redo-log: capture and linearize the output of redo builds.
redo now saves the stderr from every .do script, for every target, into a file in the .redo directory. That means you can look up the logs from the most recent build of any target using the new redo-log command, for example: redo-log -r all The default is to show logs non-recursively, that is, it'll show when a target does redo-ifchange on another target, but it won't recurse into the logs for the latter target. With -r (recursive), it does. With -u (unchanged), it does even if redo-ifchange discovered that the target was already up-to-date; in that case, it prints the logs of the *most recent* time the target was generated. With --no-details, redo-log will show only the 'redo' lines, not the other log messages. For very noisy build systems (like recursing into a 'make' instance) this can be helpful to get an overview of what happened, without all the cruft. You can use the -f (follow) option like tail -f, to follow a build that's currently in progress until it finishes. redo itself spins up a copy of redo-log -r -f while it runs, so you can see what's going on. Still broken in this version: - No man page or new tests yet. - ANSI colors don't yet work (unless you use --raw-logs, which gives the old-style behaviour). - You can't redirect the output of a sub-redo to a file or a pipe right now, because redo-log is eating it. - The regex for matching 'redo' lines in the log is very gross. Instead, we should put the raw log files in a more machine-parseable format, and redo-log should turn that into human-readable format. - redo-log tries to "linearize" the logs, which makes them comprehensible even for a large parallel build. It recursively shows log messages for each target in depth-first tree order (by tracing into a new target every time it sees a 'redo' line). This works really well, but in some specific cases, the "topmost" redo instance can get stuck waiting for a jwack token, which makes it look like the whole build has stalled, when really redo-log is just waiting a long time for a particular subprocess to be able to continue. We'll need to add a specific workaround for that.
This commit is contained in:
parent
80aafaf290
commit
b2411fe483
10 changed files with 315 additions and 23 deletions
80
builder.py
80
builder.py
|
|
@ -1,7 +1,7 @@
|
|||
import sys, os, errno, random, stat, signal, time
|
||||
import vars, jwack, state, paths
|
||||
from helpers import unlink, close_on_exec, join
|
||||
from log import log, log_, debug, debug2, err, warn
|
||||
from log import log, log_, debug, debug2, err, warn, check_tty
|
||||
|
||||
|
||||
def _nice(t):
|
||||
|
|
@ -18,6 +18,72 @@ def _try_stat(filename):
|
|||
raise
|
||||
|
||||
|
||||
log_reader_pid = None
|
||||
|
||||
|
||||
def start_stdin_log_reader(status, details):
|
||||
if vars.RAW_LOGS: return
|
||||
global log_reader_pid
|
||||
r, w = os.pipe() # main pipe to redo-log
|
||||
ar, aw = os.pipe() # ack pipe from redo-log --ack-fd
|
||||
sys.stdout.flush()
|
||||
sys.stderr.flush()
|
||||
pid = os.fork()
|
||||
if pid:
|
||||
# parent
|
||||
log_reader_pid = pid
|
||||
os.close(r)
|
||||
os.close(aw)
|
||||
b = os.read(ar, 8)
|
||||
if not b:
|
||||
# subprocess died without sending us anything: that's bad.
|
||||
err('failed to start redo-log subprocess; cannot continue.\n')
|
||||
os._exit(99)
|
||||
assert b == 'REDO-OK\n'
|
||||
# now we know the subproc is running and will report our errors
|
||||
# to stderr, so it's okay to lose our own stderr.
|
||||
os.close(ar)
|
||||
os.dup2(w, 1)
|
||||
os.dup2(w, 2)
|
||||
os.close(w)
|
||||
check_tty()
|
||||
else:
|
||||
# child
|
||||
try:
|
||||
os.close(ar)
|
||||
os.close(w)
|
||||
os.dup2(r, 0)
|
||||
os.close(r)
|
||||
argv = [
|
||||
'redo-log',
|
||||
'--recursive', '--follow',
|
||||
'--ack-fd', str(aw),
|
||||
('--status' if status else '--no-status'),
|
||||
('--details' if details else '--no-details'),
|
||||
'-'
|
||||
]
|
||||
os.execvp(argv[0], argv)
|
||||
except Exception, e:
|
||||
sys.stderr.write('redo-log: exec: %s\n' % e)
|
||||
finally:
|
||||
os._exit(99)
|
||||
|
||||
|
||||
def await_log_reader():
|
||||
if vars.RAW_LOGS: return
|
||||
global log_reader_pid
|
||||
if log_reader_pid > 0:
|
||||
# never actually close fd#1 or fd#2; insanity awaits.
|
||||
# replace it with something else instead.
|
||||
# Since our stdout/stderr are attached to redo-log's stdin,
|
||||
# this will notify redo-log that it's time to die (after it finishes
|
||||
# reading the logs)
|
||||
out = open('/dev/tty', 'w')
|
||||
os.dup2(out.fileno(), 1)
|
||||
os.dup2(out.fileno(), 2)
|
||||
os.waitpid(log_reader_pid, 0)
|
||||
|
||||
|
||||
class ImmediateReturn(Exception):
|
||||
def __init__(self, rv):
|
||||
Exception.__init__(self, "immediate return with exit code %d" % rv)
|
||||
|
|
@ -44,12 +110,14 @@ class BuildJob:
|
|||
assert(self.lock.owned)
|
||||
try:
|
||||
try:
|
||||
dirty = self.shouldbuildfunc(self.t)
|
||||
is_target, dirty = self.shouldbuildfunc(self.t)
|
||||
except state.CyclicDependencyError:
|
||||
err('cyclic dependency while checking %s\n' % _nice(self.t))
|
||||
raise ImmediateReturn(208)
|
||||
if not dirty:
|
||||
# target doesn't need to be built; skip the whole task
|
||||
if is_target and vars.DEBUG_LOCKS:
|
||||
log('[unchanged] %s\n' % _nice(self.t))
|
||||
return self._after2(0)
|
||||
except ImmediateReturn, e:
|
||||
return self._after2(e.rv)
|
||||
|
|
@ -117,6 +185,9 @@ class BuildJob:
|
|||
firstline = open(os.path.join(dodir, dofile)).readline().strip()
|
||||
if firstline.startswith('#!/'):
|
||||
argv[0:2] = firstline[2:].split(' ')
|
||||
# make sure to create the logfile *before* writing the log about it.
|
||||
# that way redo-log won't trace into an obsolete logfile.
|
||||
if not vars.RAW_LOGS: open(state.logname(self.sf.id), 'w')
|
||||
log('%s\n' % _nice(t))
|
||||
self.dodir = dodir
|
||||
self.basename = basename
|
||||
|
|
@ -173,6 +244,11 @@ class BuildJob:
|
|||
os.dup2(self.f.fileno(), 1)
|
||||
os.close(self.f.fileno())
|
||||
close_on_exec(1, False)
|
||||
if not vars.RAW_LOGS:
|
||||
logf = open(state.logname(self.sf.id), 'w')
|
||||
os.dup2(logf.fileno(), 2)
|
||||
close_on_exec(2, False)
|
||||
logf.close()
|
||||
signal.signal(signal.SIGPIPE, signal.SIG_DFL) # python ignores SIGPIPE
|
||||
if vars.VERBOSE or vars.XTRACE: log_('* %s\n' % ' '.join(self.argv))
|
||||
os.execvp(self.argv[0], self.argv)
|
||||
|
|
|
|||
3
jwack.py
3
jwack.py
|
|
@ -37,6 +37,9 @@ def _timeout(sig, frame):
|
|||
pass
|
||||
|
||||
|
||||
# We make the pipes use the first available fd numbers starting at startfd.
|
||||
# This makes it easier to differentiate different kinds of pipes when using
|
||||
# strace.
|
||||
def _make_pipe(startfd):
|
||||
(a,b) = os.pipe()
|
||||
fds = (fcntl.fcntl(a, fcntl.F_DUPFD, startfd),
|
||||
|
|
|
|||
18
log.py
18
log.py
|
|
@ -1,20 +1,24 @@
|
|||
import sys, os
|
||||
import vars
|
||||
|
||||
# By default, no output colouring.
|
||||
RED = ""
|
||||
GREEN = ""
|
||||
YELLOW = ""
|
||||
BOLD = ""
|
||||
PLAIN = ""
|
||||
|
||||
if sys.stderr.isatty() and (os.environ.get('TERM') or 'dumb') != 'dumb':
|
||||
def check_tty():
|
||||
global RED, GREEN, YELLOW, BOLD, PLAIN
|
||||
if sys.stderr.isatty() and (os.environ.get('TERM') or 'dumb') != 'dumb':
|
||||
# ...use ANSI formatting codes.
|
||||
RED = "\x1b[31m"
|
||||
GREEN = "\x1b[32m"
|
||||
YELLOW = "\x1b[33m"
|
||||
BOLD = "\x1b[1m"
|
||||
PLAIN = "\x1b[m"
|
||||
else:
|
||||
RED = ""
|
||||
GREEN = ""
|
||||
YELLOW = ""
|
||||
BOLD = ""
|
||||
PLAIN = ""
|
||||
|
||||
check_tty()
|
||||
|
||||
|
||||
def log_(s):
|
||||
|
|
|
|||
|
|
@ -14,11 +14,13 @@ def should_build(t):
|
|||
raise builder.ImmediateReturn(32)
|
||||
dirty = deps.isdirty(f, depth = '', max_changed = vars.RUNID,
|
||||
already_checked=[])
|
||||
return dirty==[f] and deps.DIRTY or dirty
|
||||
return f.is_generated, dirty==[f] and deps.DIRTY or dirty
|
||||
|
||||
|
||||
rv = 202
|
||||
try:
|
||||
if vars_init.is_toplevel:
|
||||
builder.start_stdin_log_reader(status=True, details=True)
|
||||
if vars.TARGET and not vars.UNLOCKED:
|
||||
me = os.path.join(vars.STARTDIR,
|
||||
os.path.join(vars.PWD, vars.TARGET))
|
||||
|
|
@ -41,6 +43,10 @@ try:
|
|||
finally:
|
||||
jwack.force_return_tokens()
|
||||
except KeyboardInterrupt:
|
||||
if vars_init.is_toplevel:
|
||||
builder.await_log_reader()
|
||||
sys.exit(200)
|
||||
state.commit()
|
||||
if vars_init.is_toplevel:
|
||||
builder.await_log_reader()
|
||||
sys.exit(rv)
|
||||
|
|
|
|||
1
redo-log
Symbolic link
1
redo-log
Symbolic link
|
|
@ -0,0 +1 @@
|
|||
redo-log.py
|
||||
175
redo-log.py
Executable file
175
redo-log.py
Executable file
|
|
@ -0,0 +1,175 @@
|
|||
#!/usr/bin/env python
|
||||
import errno, os, re, sys, time
|
||||
import options
|
||||
|
||||
optspec = """
|
||||
redo-log [options...] [targets...]
|
||||
--
|
||||
r,recursive show build logs for dependencies too
|
||||
u,unchanged show lines for dependencies not needing to be rebuilt
|
||||
f,follow keep watching for more lines to be appended (like tail -f)
|
||||
no-details only show 'redo' recursion trace, not build output
|
||||
no-colorize don't colorize 'redo' log messages
|
||||
no-status don't display build summary line in --follow
|
||||
ack-fd= (internal use only) print REDO-OK to this fd upon starting
|
||||
"""
|
||||
o = options.Options(optspec)
|
||||
(opt, flags, extra) = o.parse(sys.argv[1:])
|
||||
targets = extra
|
||||
|
||||
import vars_init
|
||||
vars_init.init(list(targets))
|
||||
|
||||
import vars, state
|
||||
|
||||
already = set()
|
||||
queue = []
|
||||
depth = []
|
||||
total_lines = 0
|
||||
status = None
|
||||
|
||||
|
||||
# regexp for matching "redo" lines in the log, which we use for recursion.
|
||||
# format:
|
||||
# redo path/to/target which might have spaces
|
||||
# redo [unchanged] path/to/target which might have spaces
|
||||
# redo path/to/target which might have spaces (comment)
|
||||
# FIXME: use a more structured format when writing the logs.
|
||||
# That will prevent false positives and negatives. Then transform the
|
||||
# structured format into a user-friendly format when printing in redo-log.
|
||||
REDO_LINE_RE = re.compile(r'^redo\s+(\[\w+\] )?([^(:]+)( \([^)]+\))?\n$')
|
||||
|
||||
|
||||
def is_locked(fid):
|
||||
return (fid is not None) and not state.Lock(fid=fid).trylock()
|
||||
|
||||
|
||||
def catlog(t):
|
||||
global total_lines, status
|
||||
if t in already:
|
||||
return
|
||||
depth.append(t)
|
||||
already.add(t)
|
||||
if t == '-':
|
||||
f = sys.stdin
|
||||
fid = None
|
||||
logname = None
|
||||
else:
|
||||
try:
|
||||
sf = state.File(name=t, allow_add=False)
|
||||
except KeyError:
|
||||
sys.stderr.write('redo-log: %r: not known to redo.\n' % (t,))
|
||||
sys.exit(24)
|
||||
fid = sf.id
|
||||
del sf
|
||||
state.rollback()
|
||||
logname = state.logname(fid)
|
||||
f = None
|
||||
delay = 0.01
|
||||
was_locked = is_locked(fid)
|
||||
line_head = ''
|
||||
while 1:
|
||||
if not f:
|
||||
try:
|
||||
f = open(logname)
|
||||
except IOError, e:
|
||||
if e.errno == errno.ENOENT:
|
||||
# ignore files without logs
|
||||
pass
|
||||
else:
|
||||
raise
|
||||
if f:
|
||||
# Note: normally includes trailing \n.
|
||||
# In 'follow' mode, might get a line with no trailing \n
|
||||
# (eg. when ./configure is halfway through a test), which we
|
||||
# deal with below.
|
||||
line = f.readline()
|
||||
else:
|
||||
line = None
|
||||
if not line and (not opt.follow or not was_locked):
|
||||
# file not locked, and no new lines: done
|
||||
break
|
||||
if not line:
|
||||
was_locked = is_locked(fid)
|
||||
if opt.follow:
|
||||
if opt.status:
|
||||
# FIXME use actual terminal width here
|
||||
head = '[redo] %s ' % ('{:,}'.format(total_lines))
|
||||
tail = ''
|
||||
for i in reversed(depth):
|
||||
n = os.path.basename(i)
|
||||
if 65 - len(head) - len(tail) < len(n) + 1:
|
||||
tail = '... ' + tail
|
||||
break
|
||||
else:
|
||||
tail = n + ' ' + tail
|
||||
status = head + tail
|
||||
sys.stdout.flush()
|
||||
sys.stderr.write('\r%-70.70s\r' % status)
|
||||
time.sleep(min(delay, 1.0))
|
||||
delay += 0.01
|
||||
continue
|
||||
total_lines += 1
|
||||
delay = 0.01
|
||||
if not line.endswith('\n'):
|
||||
line_head += line
|
||||
continue
|
||||
if line_head:
|
||||
line = line_head + line
|
||||
line_head = ''
|
||||
if status:
|
||||
sys.stdout.flush()
|
||||
sys.stderr.write('\r%-70.70s\r' % '')
|
||||
status = None
|
||||
g = re.match(REDO_LINE_RE, line)
|
||||
if g:
|
||||
attr, name, comment = g.groups()
|
||||
if attr == '[unchanged] ':
|
||||
if opt.unchanged:
|
||||
if name not in already:
|
||||
sys.stdout.write(line)
|
||||
if opt.recursive:
|
||||
catlog(name)
|
||||
else:
|
||||
sys.stdout.write(line)
|
||||
if opt.recursive and (not comment or comment == ' (WAITING)'):
|
||||
assert name
|
||||
catlog(name)
|
||||
else:
|
||||
if opt.details:
|
||||
sys.stdout.write(line)
|
||||
if status:
|
||||
sys.stdout.flush()
|
||||
sys.stderr.write('\r%-70.70s\r' % '')
|
||||
status = None
|
||||
if line_head:
|
||||
# partial line never got terminated
|
||||
print line_head
|
||||
assert(depth[-1] == t)
|
||||
depth.pop(-1)
|
||||
|
||||
try:
|
||||
if not targets:
|
||||
sys.stderr.write('redo-log: give at least one target; maybe "all"?\n')
|
||||
sys.exit(1)
|
||||
if opt.status < 2 and not os.isatty(2):
|
||||
opt.status = False
|
||||
if opt.ack_fd:
|
||||
ack_fd = int(opt.ack_fd)
|
||||
assert(ack_fd > 2)
|
||||
if os.write(ack_fd, 'REDO-OK\n') != 8:
|
||||
raise Exception('write to ack_fd returned wrong length')
|
||||
os.close(ack_fd)
|
||||
queue += targets
|
||||
while queue:
|
||||
t = queue.pop(0)
|
||||
if t != '-':
|
||||
print 'redo %s' % t
|
||||
catlog(t)
|
||||
except KeyboardInterrupt:
|
||||
sys.exit(200)
|
||||
except IOError, e:
|
||||
if e.errno == errno.EPIPE:
|
||||
pass
|
||||
else:
|
||||
raise
|
||||
16
redo.py
16
redo.py
|
|
@ -12,6 +12,9 @@ v,verbose print commands as they are read from .do files (variables intact)
|
|||
x,xtrace print commands as they are executed (variables expanded)
|
||||
k,keep-going keep going as long as possible even if some targets fail
|
||||
shuffle randomize the build order to find dependency bugs
|
||||
no-details only show 'redo' recursion trace (to see more later, use redo-log)
|
||||
no-status don't display build summary line at the bottom of the screen
|
||||
raw-logs don't use redo-log, just send all output straight to stderr
|
||||
debug-locks print messages about file locking (useful for debugging)
|
||||
debug-pids print process ids as part of log messages (useful for debugging)
|
||||
version print the current version and exit
|
||||
|
|
@ -35,7 +38,10 @@ if opt.keep_going:
|
|||
os.environ['REDO_KEEP_GOING'] = '1'
|
||||
if opt.shuffle:
|
||||
os.environ['REDO_SHUFFLE'] = '1'
|
||||
if opt.debug_locks:
|
||||
if opt.raw_logs:
|
||||
os.environ['REDO_RAW_LOGS'] = '1'
|
||||
if opt.debug_locks or not os.environ.get('REDO_RAW_LOGS'):
|
||||
# FIXME: force-enabled for redo-log, for now
|
||||
os.environ['REDO_DEBUG_LOCKS'] = '1'
|
||||
if opt.debug_pids:
|
||||
os.environ['REDO_DEBUG_PIDS'] = '1'
|
||||
|
|
@ -47,6 +53,8 @@ import vars, state, builder, jwack
|
|||
from log import warn, err
|
||||
|
||||
try:
|
||||
if vars_init.is_toplevel:
|
||||
builder.start_stdin_log_reader(status=opt.status, details=opt.details)
|
||||
for t in targets:
|
||||
if os.path.exists(t):
|
||||
f = state.File(name=t)
|
||||
|
|
@ -61,13 +69,17 @@ try:
|
|||
jwack.setup(j)
|
||||
try:
|
||||
assert(state.is_flushed())
|
||||
retcode = builder.main(targets, lambda t: True)
|
||||
retcode = builder.main(targets, lambda t: (True, True))
|
||||
assert(state.is_flushed())
|
||||
finally:
|
||||
try:
|
||||
state.rollback()
|
||||
finally:
|
||||
jwack.force_return_tokens()
|
||||
if vars_init.is_toplevel:
|
||||
builder.await_log_reader()
|
||||
sys.exit(retcode)
|
||||
except KeyboardInterrupt:
|
||||
if vars_init.is_toplevel:
|
||||
builder.await_log_reader()
|
||||
sys.exit(200)
|
||||
|
|
|
|||
18
state.py
18
state.py
|
|
@ -174,7 +174,7 @@ class File(object):
|
|||
# use this mostly to avoid accidentally assigning to typos
|
||||
__slots__ = ['id'] + _file_cols[1:]
|
||||
|
||||
def _init_from_idname(self, id, name):
|
||||
def _init_from_idname(self, id, name, allow_add):
|
||||
q = ('select %s from Files ' % join(', ', _file_cols))
|
||||
if id != None:
|
||||
q += 'where rowid=?'
|
||||
|
|
@ -189,7 +189,9 @@ class File(object):
|
|||
row = d.execute(q, l).fetchone()
|
||||
if not row:
|
||||
if not name:
|
||||
raise Exception('No file with id=%r name=%r' % (id, name))
|
||||
raise KeyError('No file with id=%r name=%r' % (id, name))
|
||||
elif not allow_add:
|
||||
raise KeyError('No file with name=%r' % (name,))
|
||||
try:
|
||||
_write('insert into Files (name) values (?)', [name])
|
||||
except sqlite3.IntegrityError:
|
||||
|
|
@ -207,17 +209,17 @@ class File(object):
|
|||
if self.name == ALWAYS and self.changed_runid < vars.RUNID:
|
||||
self.changed_runid = vars.RUNID
|
||||
|
||||
def __init__(self, id=None, name=None, cols=None):
|
||||
def __init__(self, id=None, name=None, cols=None, allow_add=True):
|
||||
if cols:
|
||||
return self._init_from_cols(cols)
|
||||
else:
|
||||
return self._init_from_idname(id, name)
|
||||
return self._init_from_idname(id, name, allow_add=allow_add)
|
||||
|
||||
def __repr__(self):
|
||||
return "File(%r)" % (self.nicename(),)
|
||||
|
||||
def refresh(self):
|
||||
self._init_from_idname(self.id, None)
|
||||
self._init_from_idname(self.id, None, allow_add=False)
|
||||
|
||||
def save(self):
|
||||
cols = join(', ', ['%s=?'%i for i in _file_cols[2:]])
|
||||
|
|
@ -324,6 +326,11 @@ def files():
|
|||
yield File(cols=cols)
|
||||
|
||||
|
||||
def logname(fid):
|
||||
"""Given the id of a File, return the filename of its build log."""
|
||||
return os.path.join(vars.BASE, '.redo', 'log.%d' % fid)
|
||||
|
||||
|
||||
# FIXME: I really want to use fcntl F_SETLK, F_SETLKW, etc here. But python
|
||||
# doesn't do the lockdata structure in a portable way, so we have to use
|
||||
# fcntl.lockf() instead. Usually this is just a wrapper for fcntl, so it's
|
||||
|
|
@ -365,6 +372,7 @@ class Lock:
|
|||
raise
|
||||
else:
|
||||
self.owned = True
|
||||
return self.owned
|
||||
|
||||
def waitlock(self):
|
||||
self.check()
|
||||
|
|
|
|||
1
vars.py
1
vars.py
|
|
@ -16,6 +16,7 @@ DEBUG_PIDS = os.environ.get('REDO_DEBUG_PIDS', '') and 1 or 0
|
|||
VERBOSE = os.environ.get('REDO_VERBOSE', '') and 1 or 0
|
||||
XTRACE = os.environ.get('REDO_XTRACE', '') and 1 or 0
|
||||
KEEP_GOING = os.environ.get('REDO_KEEP_GOING', '') and 1 or 0
|
||||
RAW_LOGS = os.environ.get('REDO_RAW_LOGS', '') and 1 or 0
|
||||
SHUFFLE = os.environ.get('REDO_SHUFFLE', '') and 1 or 0
|
||||
STARTDIR = os.environ.get('REDO_STARTDIR', '')
|
||||
RUNID = atoi(os.environ.get('REDO_RUNID')) or None
|
||||
|
|
|
|||
|
|
@ -1,9 +1,13 @@
|
|||
import sys, os
|
||||
|
||||
|
||||
is_toplevel = False
|
||||
|
||||
|
||||
def init_no_state():
|
||||
if not os.environ.get('REDO'):
|
||||
os.environ['REDO'] = 'NOT_DEFINED'
|
||||
is_toplevel = True
|
||||
if not os.environ.get('REDO_BASE'):
|
||||
os.environ['REDO_BASE'] = 'NOT_DEFINED'
|
||||
|
||||
|
|
@ -11,6 +15,8 @@ def init_no_state():
|
|||
def init(targets):
|
||||
if not os.environ.get('REDO'):
|
||||
# toplevel call to redo
|
||||
global is_toplevel
|
||||
is_toplevel = True
|
||||
if len(targets) == 0:
|
||||
targets.append('all')
|
||||
exenames = [os.path.abspath(sys.argv[0]),
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue