redo-log: prioritize the "foreground" process.

When running a parallel build, redo-log -f (which is auto-started by
redo) tries to traverse through the logs depth first, in the order
parent processes started subprocesses.  This works pretty well, but if
its dependencies are locked, a process might have to give up its
jobserver token while other stuff builds its dependencies.  After the
dependency finishes, the parent might not be able to get a token for
quite some time, and the logs will appear to stop.

To prevent this from happening, we can instantiate up to one "cheater"
token, only in the foreground process (the one locked by redo-log -f),
which will allow it to continue running, albeit a bit slowly (since it
only has one token out of possibly many).  When the process finishes,
we then destroy the fake token.  It gets a little complicated; see
explanation at the top of jwack.py.
This commit is contained in:
Avery Pennarun 2018-11-17 04:32:09 -05:00
commit 8b5a567b2e
7 changed files with 348 additions and 104 deletions

View file

@ -22,7 +22,7 @@ def _try_stat(filename):
log_reader_pid = None log_reader_pid = None
def start_stdin_log_reader(status, details): def start_stdin_log_reader(status, details, debug_locks, debug_pids):
if vars.RAW_LOGS: return if vars.RAW_LOGS: return
global log_reader_pid global log_reader_pid
r, w = os.pipe() # main pipe to redo-log r, w = os.pipe() # main pipe to redo-log
@ -47,7 +47,7 @@ def start_stdin_log_reader(status, details):
os.dup2(w, 1) os.dup2(w, 1)
os.dup2(w, 2) os.dup2(w, 2)
os.close(w) os.close(w)
check_tty() check_tty(sys.stderr)
else: else:
# child # child
try: try:
@ -61,6 +61,8 @@ def start_stdin_log_reader(status, details):
'--ack-fd', str(aw), '--ack-fd', str(aw),
('--status' if status and os.isatty(2) else '--no-status'), ('--status' if status and os.isatty(2) else '--no-status'),
('--details' if details else '--no-details'), ('--details' if details else '--no-details'),
('--debug-locks' if debug_locks else '--no-debug-locks'),
('--debug-pids' if debug_pids else '--no-debug-pids'),
'-' '-'
] ]
os.execvp(argv[0], argv) os.execvp(argv[0], argv)
@ -182,7 +184,6 @@ class BuildJob:
] ]
if vars.VERBOSE: argv[1] += 'v' if vars.VERBOSE: argv[1] += 'v'
if vars.XTRACE: argv[1] += 'x' if vars.XTRACE: argv[1] += 'x'
if vars.VERBOSE or vars.XTRACE: logs.write('\n')
firstline = open(os.path.join(dodir, dofile)).readline().strip() firstline = open(os.path.join(dodir, dofile)).readline().strip()
if firstline.startswith('#!/'): if firstline.startswith('#!/'):
argv[0:2] = firstline[2:].split(' ') argv[0:2] = firstline[2:].split(' ')
@ -252,7 +253,7 @@ class BuildJob:
logf.close() logf.close()
signal.signal(signal.SIGPIPE, signal.SIG_DFL) # python ignores SIGPIPE signal.signal(signal.SIGPIPE, signal.SIG_DFL) # python ignores SIGPIPE
if vars.VERBOSE or vars.XTRACE: if vars.VERBOSE or vars.XTRACE:
logs.write('* %s\n' % ' '.join(self.argv).replace('\n', ' ')) logs.write('* %s' % ' '.join(self.argv).replace('\n', ' '))
os.execvp(self.argv[0], self.argv) os.execvp(self.argv[0], self.argv)
# FIXME: it would be nice to log the exit code to logf. # FIXME: it would be nice to log the exit code to logf.
# But that would have to happen in the parent process, which doesn't # But that would have to happen in the parent process, which doesn't
@ -354,6 +355,27 @@ def main(targets, shouldbuildfunc):
if rv: if rv:
retcode[0] = 1 retcode[0] = 1
if vars.TARGET and not vars.UNLOCKED:
me = os.path.join(vars.STARTDIR,
os.path.join(vars.PWD, vars.TARGET))
myfile = state.File(name=me)
selflock = state.Lock(state.LOG_LOCK_MAGIC + myfile.id)
else:
selflock = myfile = me = None
def cheat():
if not selflock: return 0
selflock.trylock()
if not selflock.owned:
# redo-log already owns it: let's cheat.
# Give ourselves one extra token so that the "foreground" log
# can always make progress.
return 1
else:
# redo-log isn't watching us (yet)
selflock.unlock()
return 0
# In the first cycle, we just build as much as we can without worrying # In the first cycle, we just build as much as we can without worrying
# about any lock contention. If someone else has it locked, we move on. # about any lock contention. If someone else has it locked, we move on.
seen = {} seen = {}
@ -369,7 +391,7 @@ def main(targets, shouldbuildfunc):
seen[t] = 1 seen[t] = 1
if not jwack.has_token(): if not jwack.has_token():
state.commit() state.commit()
jwack.get_token(t) jwack.ensure_token_or_cheat(t, cheat)
if retcode[0] and not vars.KEEP_GOING: if retcode[0] and not vars.KEEP_GOING:
break break
if not state.check_sane(): if not state.check_sane():
@ -409,6 +431,8 @@ def main(targets, shouldbuildfunc):
while locked or jwack.running(): while locked or jwack.running():
state.commit() state.commit()
jwack.wait_all() jwack.wait_all()
assert jwack._mytokens == 0
jwack.ensure_token_or_cheat('self', cheat)
# at this point, we don't have any children holding any tokens, so # at this point, we don't have any children holding any tokens, so
# it's okay to block below. # it's okay to block below.
if retcode[0] and not vars.KEEP_GOING: if retcode[0] and not vars.KEEP_GOING:
@ -427,6 +451,8 @@ def main(targets, shouldbuildfunc):
import random import random
time.sleep(random.random() * min(backoff, 1.0)) time.sleep(random.random() * min(backoff, 1.0))
backoff *= 2 backoff *= 2
# after printing this line, redo-log will recurse into t,
# whether it's us building it, or someone else.
meta('waiting', _nice(t)) meta('waiting', _nice(t))
try: try:
lock.check() lock.check()
@ -436,12 +462,14 @@ def main(targets, shouldbuildfunc):
return retcode[0] return retcode[0]
# this sequence looks a little silly, but the idea is to # this sequence looks a little silly, but the idea is to
# give up our personal token while we wait for the lock to # give up our personal token while we wait for the lock to
# be released; but we should never run get_token() while # be released; but we should never run ensure_token() while
# holding a lock, or we could cause deadlocks. # holding a lock, or we could cause deadlocks.
jwack.release_mine() jwack.release_mine()
lock.waitlock() lock.waitlock()
# now t is definitely free, so we get to decide whether
# to build it.
lock.unlock() lock.unlock()
jwack.get_token(t) jwack.ensure_token_or_cheat(t, cheat)
lock.trylock() lock.trylock()
assert(lock.owned) assert(lock.owned)
meta('unlocked', _nice(t)) meta('unlocked', _nice(t))

330
jwack.py
View file

@ -1,13 +1,87 @@
# #
# beware the jobberwack # Implementation of a GNU make-compatible jobserver.
#
# The basic idea is that both ends of a pipe (tokenfds) are shared with all
# subprocesses. At startup, we write one "token" into the pipe for each
# configured job. (So eg. redo -j20 will put 20 tokens in the pipe.) In
# order to do work, you must first obtain a token, by reading the other
# end of the pipe. When you're done working, you write the token back into
# the pipe so that someone else can grab it.
#
# The toplevel process in the hierarchy is what creates the pipes in the
# first place. Then it puts the pipe file descriptor numbers into MAKEFLAGS,
# so that subprocesses can pull them back out.
#
# As usual, edge cases make all this a bit tricky:
#
# - Every process is defined as owning a token at startup time. This makes
# sense because it's backward compatible with single-process make: if a
# subprocess neither reads nor writes the pipe, then it has exactly one
# token, so it's allowed to do one thread of work.
#
# - Thus, for symmetry, processes also must own a token at exit time.
#
# - In turn, to make *that* work, a parent process must destroy *its* token
# upon launching a subprocess. (Destroy, not release, because the
# subprocess has created its own token.) It can try to obtain another
# token, but if none are available, it has to stop work until one of its
# subprocesses finishes. When the subprocess finishes, its token is
# destroyed, so the parent creates a new one.
#
# - If our process is going to stop and wait for a lock (eg. because we
# depend on a target and someone else is already building that target),
# we must give up our token. Otherwise, we're sucking up a "thread" (a
# unit of parallelism) just to do nothing. If enough processes are waiting
# on a particular lock, then the process building that target might end up
# with only a single token, and everything gets serialized.
#
# - Unfortunately this leads to a problem: if we give up our token, we then
# have to re-acquire a token before exiting, even if we want to exit with
# an error code.
#
# - redo-log wants to linearize output so that it always prints log messages
# in the order jobs were started; but because of the above, a job being
# logged might end up with no tokens for a long time, waiting for some
# other branch of the build to complete.
#
# As a result, we extend beyond GNU make's model and make things even more
# complicated. We add a second pipe, cheatfds, which we use to "cheat" on
# tokens if our particular job is in the foreground (ie. is the one
# currently being tailed by redo-log -f). We add at most one token per
# redo-log instance. If we are the foreground task, and we need a token,
# and we don't have a token, and we don't have any subtasks (because if we
# had a subtask, then we're not in the foreground), we synthesize our own
# token by incrementing _mytokens and _cheats, but we don't read from
# tokenfds. Then, when it's time to give up our token again, we also won't
# write back to tokenfds, so the synthesized token disappears.
#
# Of course, all that then leads to *another* problem: every process must
# hold a *real* token when it exits, because its parent has given up a
# *real* token in order to start this subprocess. If we're holding a cheat
# token when it's time to exit, then we can't meet this requirement. The
# obvious thing to do would be to give up the cheat token and wait for a
# real token, but that might take a very long time, and if we're the last
# thing preventing our parent from exiting, then redo-log will sit around
# following our parent until we finally get a token so we can exit,
# defeating the whole purpose of cheating. Instead of waiting, we write our
# "cheater" token to cheatfds. Then, any task, upon noticing one of its
# subprocesses has finished, will check to see if there are any tokens on
# cheatfds; if so, it will remove one of them and *not* re-create its
# child's token, thus destroying the cheater token from earlier, and restoring
# balance.
#
# Sorry this is so complicated. I couldn't think of a way to make it
# simpler :)
# #
import sys, os, errno, select, fcntl, signal import sys, os, errno, select, fcntl, signal
from helpers import atoi, close_on_exec from helpers import atoi, close_on_exec
import state import state, vars
_toplevel = 0 _toplevel = 0
_mytokens = 1 _mytokens = 1
_fds = None _cheats = 0
_tokenfds = None
_cheatfds = None
_waitfds = {} _waitfds = {}
@ -16,21 +90,52 @@ def _debug(s):
sys.stderr.write('jwack#%d: %s' % (os.getpid(),s)) sys.stderr.write('jwack#%d: %s' % (os.getpid(),s))
def _release(n): def _create_tokens(n):
global _mytokens, _cheats
assert n >= 0
assert _cheats >= 0
for _ in xrange(n):
if _cheats > 0:
_cheats -= 1
else:
_mytokens += 1
def _destroy_tokens(n):
global _mytokens global _mytokens
_mytokens += n assert _mytokens >= n
_debug('release(%d) -> %d\n' % (n, _mytokens)) _mytokens -= n
if _mytokens > 1:
os.write(_fds[1], 't' * (_mytokens-1))
_mytokens = 1 def _release(n):
global _mytokens, _cheats
assert n >= 0
assert _mytokens >= n
_debug('%d,%d -> release(%d)\n' % (_mytokens, _cheats, n))
n_to_share = 0
for _ in xrange(n):
_mytokens -= 1
if _cheats > 0:
_cheats -= 1
else:
n_to_share += 1
assert _mytokens >= 0
assert _cheats >= 0
if n_to_share:
_debug('PUT tokenfds %d\n' % n_to_share)
os.write(_tokenfds[1], 't' * n_to_share)
def _release_except_mine():
assert _mytokens > 0
_release(_mytokens - 1)
def release_mine(): def release_mine():
global _mytokens global _mytokens
assert(_mytokens >= 1) assert _mytokens >= 1
_mytokens -= 1 _debug('%d,%d -> release_mine()\n' % (_mytokens, _cheats))
_debug('release_mine() -> %d\n' % _mytokens) _release(1)
os.write(_fds[1], 't')
def _timeout(sig, frame): def _timeout(sig, frame):
@ -50,38 +155,51 @@ def _make_pipe(startfd):
def _try_read(fd, n): def _try_read(fd, n):
"""Try to read n bytes from fd. Returns: '' on EOF, None if EAGAIN."""
assert state.is_flushed()
# using djb's suggested way of doing non-blocking reads from a blocking # using djb's suggested way of doing non-blocking reads from a blocking
# socket: http://cr.yp.to/unix/nonblock.html # socket: http://cr.yp.to/unix/nonblock.html
# We can't just make the socket non-blocking, because we want to be # We can't just make the socket non-blocking, because we want to be
# compatible with GNU Make, and they can't handle it. # compatible with GNU Make, and they can't handle it.
r,w,x = select.select([fd], [], [], 0) r,w,x = select.select([fd], [], [], 0)
if not r: if not r:
return '' # try again return None # try again
# ok, the socket is readable - but some other process might get there # ok, the socket is readable - but some other process might get there
# first. We have to set an alarm() in case our read() gets stuck. # first. We have to set an alarm() in case our read() gets stuck.
assert(state.is_flushed())
oldh = signal.signal(signal.SIGALRM, _timeout) oldh = signal.signal(signal.SIGALRM, _timeout)
try: try:
signal.setitimer(signal.ITIMER_REAL, 0.01, 0.01) # emergency fallback signal.setitimer(signal.ITIMER_REAL, 0.01, 0.01) # emergency fallback
try: try:
b = os.read(_fds[0], 1) b = os.read(fd, 1)
except OSError, e: except OSError, e:
if e.errno in (errno.EAGAIN, errno.EINTR): if e.errno in (errno.EAGAIN, errno.EINTR):
# interrupted or it was nonblocking # interrupted or it was nonblocking
return '' # try again return None # try again
else: else:
raise raise
finally: finally:
signal.setitimer(signal.ITIMER_REAL, 0, 0) signal.setitimer(signal.ITIMER_REAL, 0, 0)
signal.signal(signal.SIGALRM, oldh) signal.signal(signal.SIGALRM, oldh)
return b and b or None # None means EOF return b
def _try_read_all(fd, n):
bb = ''
while 1:
b = _try_read(fd, n)
if not b:
break
bb += b
return bb
def setup(maxjobs): def setup(maxjobs):
global _fds, _toplevel global _tokenfds, _cheatfds, _toplevel
if _fds: assert maxjobs > 0
return # already set up assert not _tokenfds
_debug('setup(%d)\n' % maxjobs) _debug('setup(%d)\n' % maxjobs)
flags = ' ' + os.getenv('MAKEFLAGS', '') + ' ' flags = ' ' + os.getenv('MAKEFLAGS', '') + ' '
FIND1 = ' --jobserver-auth=' # renamed in GNU make 4.2 FIND1 = ' --jobserver-auth=' # renamed in GNU make 4.2
FIND2 = ' --jobserver-fds=' # fallback syntax FIND2 = ' --jobserver-fds=' # fallback syntax
@ -106,38 +224,64 @@ def setup(maxjobs):
raise ValueError('broken --jobserver-auth from make; prefix your Makefile rule with a "+"') raise ValueError('broken --jobserver-auth from make; prefix your Makefile rule with a "+"')
else: else:
raise raise
_fds = (a,b) _tokenfds = (a,b)
if maxjobs and not _fds:
cheats = os.getenv('REDO_CHEATFDS', '')
if cheats:
(a,b) = cheats.split(',', 1)
a = atoi(a)
b = atoi(b)
if a <= 0 or b <= 0:
raise ValueError('invalid REDO_CHEATFDS: %r' % cheats)
_cheatfds = (a,b)
if not _tokenfds:
# need to start a new server # need to start a new server
_toplevel = maxjobs _toplevel = maxjobs
_fds = _make_pipe(100) _tokenfds = _make_pipe(100)
_release(maxjobs-1) _create_tokens(maxjobs - 1)
_release_except_mine()
os.putenv('MAKEFLAGS', os.putenv('MAKEFLAGS',
'%s -j --jobserver-auth=%d,%d --jobserver-fds=%d,%d' % '%s -j --jobserver-auth=%d,%d --jobserver-fds=%d,%d' %
(os.getenv('MAKEFLAGS', ''), (os.getenv('MAKEFLAGS', ''),
_fds[0], _fds[1], _tokenfds[0], _tokenfds[1],
_fds[0], _fds[1])) _tokenfds[0], _tokenfds[1]))
_cheatfds = _make_pipe(102)
os.putenv('REDO_CHEATFDS', '%d,%d' % (_cheatfds[0], _cheatfds[1]))
def wait(want_token): def _wait(want_token, max_delay):
rfds = _waitfds.keys() rfds = _waitfds.keys()
if _fds and want_token: if want_token:
rfds.append(_fds[0]) rfds.append(_tokenfds[0])
assert(rfds) assert rfds
assert(state.is_flushed()) assert state.is_flushed()
r,w,x = select.select(rfds, [], []) r,w,x = select.select(rfds, [], [], max_delay)
_debug('_fds=%r; wfds=%r; readable: %r\n' % (_fds, _waitfds, r)) _debug('_tokenfds=%r; wfds=%r; readable: %r\n' % (_tokenfds, _waitfds, r))
for fd in r: for fd in r:
if _fds and fd == _fds[0]: if fd == _tokenfds[0]:
pass pass
else: else:
pd = _waitfds[fd] pd = _waitfds[fd]
_debug("done: %r\n" % pd.name) _debug("done: %r\n" % pd.name)
_release(1) # redo subprocesses are expected to die without releasing their
# tokens, so things are less likely to get confused if they
# die abnormally. That means a token has 'disappeared' and we
# now need to recreate it.
b = _try_read(_cheatfds[0], 1)
_debug('GOT cheatfd\n')
if b == None:
_create_tokens(1)
if has_token():
_release_except_mine()
else:
# someone exited with _cheats > 0, so we need to compensate
# by *not* re-creating a token now.
pass
os.close(fd) os.close(fd)
del _waitfds[fd] del _waitfds[fd]
rv = os.waitpid(pd.pid, 0) rv = os.waitpid(pd.pid, 0)
assert(rv[0] == pd.pid) assert rv[0] == pd.pid
_debug("done1: rv=%r\n" % (rv,)) _debug("done1: rv=%r\n" % (rv,))
rv = rv[1] rv = rv[1]
if os.WIFEXITED(rv): if os.WIFEXITED(rv):
@ -149,36 +293,59 @@ def wait(want_token):
def has_token(): def has_token():
assert _mytokens >= 0
if _mytokens >= 1: if _mytokens >= 1:
return True return True
def get_token(reason): def ensure_token(reason, max_delay=None):
assert(state.is_flushed())
global _mytokens global _mytokens
assert(_mytokens <= 1) assert state.is_flushed()
setup(1) assert _mytokens <= 1
while 1: while 1:
if _mytokens >= 1: if _mytokens >= 1:
_debug("_mytokens is %d\n" % _mytokens) _debug("_mytokens is %d\n" % _mytokens)
assert(_mytokens == 1) assert _mytokens == 1
_debug('(%r) used my own token...\n' % reason) _debug('(%r) used my own token...\n' % reason)
break break
assert(_mytokens < 1) assert _mytokens < 1
_debug('(%r) waiting for tokens...\n' % reason) _debug('(%r) waiting for tokens...\n' % reason)
wait(want_token=1) _wait(want_token=1, max_delay=max_delay)
if _mytokens >= 1: if _mytokens >= 1:
break break
assert(_mytokens < 1) assert _mytokens < 1
if _fds: b = _try_read(_tokenfds[0], 1)
b = _try_read(_fds[0], 1) _debug('GOT tokenfd\n')
if b == None: if b == '':
raise Exception('unexpected EOF on token read') raise Exception('unexpected EOF on token read')
if b: if b:
_mytokens += 1 _mytokens += 1
_debug('(%r) got a token (%r).\n' % (reason, b)) _debug('(%r) got a token (%r).\n' % (reason, b))
break break
assert(_mytokens <= 1) if max_delay != None:
break
assert _mytokens <= 1
def ensure_token_or_cheat(reason, cheatfunc):
global _mytokens, _cheats
backoff = 0.01
while not has_token():
while running() and not has_token():
# If we already have a subproc running, then effectively we
# already have a token. Don't create a cheater token unless
# we're completely idle.
ensure_token(reason, max_delay=None)
ensure_token(reason, max_delay=min(1.0, backoff))
backoff *= 2
if not has_token():
assert _mytokens == 0
n = cheatfunc()
_debug('%s: %s: cheat = %d\n' % (vars.TARGET, reason, n))
if n > 0:
_mytokens += n
_cheats += n
break
def running(): def running():
@ -186,37 +353,50 @@ def running():
def wait_all(): def wait_all():
_debug("wait_all\n") _debug("%d,%d -> wait_all\n" % (_mytokens, _cheats))
assert(state.is_flushed()) assert state.is_flushed()
while running(): while 1:
while _mytokens >= 1: while _mytokens >= 1:
release_mine() release_mine()
if not running():
break
_debug("wait_all: wait()\n") _debug("wait_all: wait()\n")
wait(want_token=0) _wait(want_token=0, max_delay=None)
_debug("wait_all: empty list\n") _debug("wait_all: empty list\n")
get_token('self') # get my token back
if _toplevel: if _toplevel:
bb = '' # If we're the toplevel and we're sure no child processes remain,
while 1: # then we know we're totally idle. Self-test to ensure no tokens
b = _try_read(_fds[0], 8192) # mysteriously got created/destroyed.
bb += b tokens = _try_read_all(_tokenfds[0], 8192)
if not b: break cheats = _try_read_all(_cheatfds[0], 8192)
if len(bb) != _toplevel-1: _debug('toplevel: GOT %d tokens and %d cheats\n'
raise Exception('on exit: expected %d tokens; found %r' % (len(tokens), len(cheats)))
% (_toplevel-1, len(bb))) if len(tokens) - len(cheats) != _toplevel:
os.write(_fds[1], bb) raise Exception('on exit: expected %d tokens; found %r-%r'
% (_toplevel, len(tokens), len(cheats)))
os.write(_tokenfds[1], tokens)
# note: when we return, we have *no* tokens, not even our own!
# If caller wants to continue, they have to obtain one right away.
def force_return_tokens(): def force_return_tokens():
n = len(_waitfds) n = len(_waitfds)
if n: _debug('%d,%d -> %d jobs left in force_return_tokens\n'
_debug('%d tokens left in force_return_tokens\n' % n) % (_mytokens, _cheats, n))
_debug('returning %d tokens\n' % n)
for k in _waitfds.keys(): for k in _waitfds.keys():
del _waitfds[k] del _waitfds[k]
if _fds: _create_tokens(n)
_release(n) if has_token():
assert(state.is_flushed()) _release_except_mine()
assert _mytokens == 1, 'mytokens=%d' % _mytokens
assert _cheats <= _mytokens, 'mytokens=%d cheats=%d' % (_mytokens, _cheats)
assert _cheats in (0, 1), 'cheats=%d' % _cheats
if _cheats:
_debug('%d,%d -> force_return_tokens: recovering final token\n'
% (_mytokens, _cheats))
_destroy_tokens(_cheats)
os.write(_cheatfds[1], 't' * _cheats)
assert state.is_flushed()
def _pre_job(r, w, pfn): def _pre_job(r, w, pfn):
@ -237,13 +417,13 @@ class Job:
def start_job(reason, jobfunc, donefunc): def start_job(reason, jobfunc, donefunc):
assert(state.is_flushed()) assert state.is_flushed()
global _mytokens global _mytokens
assert(_mytokens <= 1) assert _mytokens <= 1
get_token(reason) assert _mytokens == 1
assert(_mytokens >= 1) # Subprocesses always start with 1 token, so we have to destroy ours
assert(_mytokens == 1) # in order for the universe to stay in balance.
_mytokens -= 1 _destroy_tokens(1)
r,w = _make_pipe(50) r,w = _make_pipe(50)
pid = os.fork() pid = os.fork()
if pid == 0: if pid == 0:

View file

@ -2,9 +2,9 @@ import os, re, sys, time
import vars import vars
def check_tty(): def check_tty(file):
global RED, GREEN, YELLOW, BOLD, PLAIN global RED, GREEN, YELLOW, BOLD, PLAIN
if sys.stderr.isatty() and (os.environ.get('TERM') or 'dumb') != 'dumb': if file.isatty() and (os.environ.get('TERM') or 'dumb') != 'dumb':
# ...use ANSI formatting codes. # ...use ANSI formatting codes.
RED = "\x1b[31m" RED = "\x1b[31m"
GREEN = "\x1b[32m" GREEN = "\x1b[32m"
@ -52,14 +52,13 @@ class PrettyLog(object):
sys.stderr.flush() sys.stderr.flush()
g = REDO_RE.match(s) g = REDO_RE.match(s)
if g: if g:
# FIXME: support vars.DEBUG_PIDS somewhere
all = g.group(0) all = g.group(0)
self.file.write(s[:-len(all)]) self.file.write(s[:-len(all)])
words = g.group(1).split(':') words = g.group(1).split(':')
text = g.group(2) text = g.group(2)
kind, pid, when = words[0:3] kind, pid, when = words[0:3]
pid = int(pid)
if kind == 'unchanged': if kind == 'unchanged':
if vars.DEBUG >= 1:
self._pretty(pid, '', '%s (unchanged)' % text) self._pretty(pid, '', '%s (unchanged)' % text)
elif kind == 'check': elif kind == 'check':
self._pretty(pid, GREEN, '(%s)' % text) self._pretty(pid, GREEN, '(%s)' % text)
@ -102,7 +101,7 @@ _log = None
def setup(file, pretty): def setup(file, pretty):
global _log global _log
if pretty: if pretty:
check_tty() check_tty(file)
_log = PrettyLog(file=file) _log = PrettyLog(file=file)
else: else:
_log = RawLog(file=file) _log = RawLog(file=file)

View file

@ -1,5 +1,5 @@
#!/usr/bin/env python2 #!/usr/bin/env python2
import sys, os import sys, os, traceback
import vars_init import vars_init
vars_init.init(sys.argv[1:]) vars_init.init(sys.argv[1:])
@ -20,7 +20,8 @@ def should_build(t):
rv = 202 rv = 202
try: try:
if vars_init.is_toplevel: if vars_init.is_toplevel:
builder.start_stdin_log_reader(status=True, details=True) builder.start_stdin_log_reader(status=True, details=True,
debug_locks=False, debug_pids=False)
if vars.TARGET and not vars.UNLOCKED: if vars.TARGET and not vars.UNLOCKED:
me = os.path.join(vars.STARTDIR, me = os.path.join(vars.STARTDIR,
os.path.join(vars.PWD, vars.TARGET)) os.path.join(vars.PWD, vars.TARGET))
@ -29,6 +30,7 @@ try:
else: else:
f = me = None f = me = None
debug2('redo-ifchange: not adding depends.\n') debug2('redo-ifchange: not adding depends.\n')
jwack.setup(1)
try: try:
targets = sys.argv[1:] targets = sys.argv[1:]
if f: if f:
@ -41,7 +43,12 @@ try:
try: try:
state.rollback() state.rollback()
finally: finally:
try:
jwack.force_return_tokens() jwack.force_return_tokens()
except Exception, e:
traceback.print_exc(100, sys.stderr)
err('unexpected error: %r\n' % e)
rv = 1
except KeyboardInterrupt: except KeyboardInterrupt:
if vars_init.is_toplevel: if vars_init.is_toplevel:
builder.await_log_reader() builder.await_log_reader()

View file

@ -12,6 +12,8 @@ no-details only show 'redo' recursion trace, not build output
no-colorize don't colorize 'redo' log messages no-colorize don't colorize 'redo' log messages
no-status don't display build summary line in --follow no-status don't display build summary line in --follow
raw-logs don't format logs, just send raw output straight to stdout raw-logs don't format logs, just send raw output straight to stdout
debug-locks print messages about file locking (useful for debugging)
debug-pids print process ids in log messages (useful for debugging)
ack-fd= (internal use only) print REDO-OK to this fd upon starting ack-fd= (internal use only) print REDO-OK to this fd upon starting
""" """
o = options.Options(optspec) o = options.Options(optspec)
@ -75,6 +77,7 @@ def catlog(t):
if t == '-': if t == '-':
f = sys.stdin f = sys.stdin
fid = None fid = None
loglock = None
logname = None logname = None
else: else:
try: try:
@ -86,6 +89,8 @@ def catlog(t):
del sf del sf
state.rollback() state.rollback()
logname = state.logname(fid) logname = state.logname(fid)
loglock = state.Lock(fid + state.LOG_LOCK_MAGIC)
loglock.waitlock(shared=True)
f = None f = None
delay = 0.01 delay = 0.01
was_locked = is_locked(fid) was_locked = is_locked(fid)
@ -161,20 +166,31 @@ def catlog(t):
kind, pid, when = words.split(':')[0:3] kind, pid, when = words.split(':')[0:3]
if kind == 'unchanged': if kind == 'unchanged':
if opt.unchanged: if opt.unchanged:
if text not in already: if opt.debug_locks:
logs.write(line.rstrip()) logs.write(line.rstrip())
elif text not in already:
logs.meta('do', text)
if opt.recursive: if opt.recursive:
if loglock: loglock.unlock()
catlog(text) catlog(text)
elif kind in ('do', 'waiting'): if loglock: loglock.waitlock(shared=True)
elif kind in ('do', 'waiting', 'locked', 'unlocked'):
if opt.debug_locks:
logs.write(line.rstrip()) logs.write(line.rstrip())
elif text not in already:
logs.meta('do', text)
if opt.recursive: if opt.recursive:
assert text assert text
if loglock: loglock.unlock()
catlog(text) catlog(text)
if loglock: loglock.waitlock(shared=True)
else: else:
logs.write(line.rstrip()) logs.write(line.rstrip())
else: else:
if opt.details: if opt.details:
logs.write(line.rstrip()) logs.write(line.rstrip())
if loglock:
loglock.unlock()
if status: if status:
sys.stdout.flush() sys.stdout.flush()
sys.stderr.write('\r%-*.*s\r' % (width, width, '')) sys.stderr.write('\r%-*.*s\r' % (width, width, ''))
@ -197,6 +213,10 @@ try:
logs.setup(file=sys.stdout, pretty=False) logs.setup(file=sys.stdout, pretty=False)
else: else:
logs.setup(file=sys.stdout, pretty=True) logs.setup(file=sys.stdout, pretty=True)
if opt.debug_locks:
vars.DEBUG_LOCKS = 1
if opt.debug_pids:
vars.DEBUG_PIDS = 1
if opt.ack_fd: if opt.ack_fd:
# Write back to owner, to let them know we started up okay and # Write back to owner, to let them know we started up okay and
# will be able to see their error output, so it's okay to close # will be able to see their error output, so it's okay to close

10
redo.py
View file

@ -1,5 +1,5 @@
#!/usr/bin/env python2 #!/usr/bin/env python2
import sys, os import sys, os, traceback
import options import options
from helpers import atoi from helpers import atoi
@ -54,7 +54,8 @@ from logs import warn, err
try: try:
if vars_init.is_toplevel: if vars_init.is_toplevel:
builder.start_stdin_log_reader(status=opt.status, details=opt.details) builder.start_stdin_log_reader(status=opt.status, details=opt.details,
debug_locks=opt.debug_locks, debug_pids=opt.debug_pids)
for t in targets: for t in targets:
if os.path.exists(t): if os.path.exists(t):
f = state.File(name=t) f = state.File(name=t)
@ -75,7 +76,12 @@ try:
try: try:
state.rollback() state.rollback()
finally: finally:
try:
jwack.force_return_tokens() jwack.force_return_tokens()
except Exception, e:
traceback.print_exc(100, sys.stderr)
err('unexpected error: %r\n' % e)
retcode = 1
if vars_init.is_toplevel: if vars_init.is_toplevel:
builder.await_log_reader() builder.await_log_reader()
sys.exit(retcode) sys.exit(retcode)

View file

@ -21,6 +21,8 @@ ALWAYS='//ALWAYS' # an invalid filename that is always marked as dirty
STAMP_DIR='dir' # the stamp of a directory; mtime is unhelpful STAMP_DIR='dir' # the stamp of a directory; mtime is unhelpful
STAMP_MISSING='0' # the stamp of a nonexistent file STAMP_MISSING='0' # the stamp of a nonexistent file
LOG_LOCK_MAGIC=0x10000000 # fid offset for "log locks"
class CyclicDependencyError(Exception): pass class CyclicDependencyError(Exception): pass
@ -374,9 +376,11 @@ class Lock:
self.owned = True self.owned = True
return self.owned return self.owned
def waitlock(self): def waitlock(self, shared=False):
self.check() self.check()
fcntl.lockf(self.lockfile, fcntl.LOCK_EX, 0, 0) fcntl.lockf(self.lockfile,
fcntl.LOCK_SH if shared else fcntl.LOCK_EX,
0, 0)
self.owned = True self.owned = True
def unlock(self): def unlock(self):