diff --git a/builder.py b/builder.py
index b85b28f..3da03eb 100644
--- a/builder.py
+++ b/builder.py
@@ -22,7 +22,7 @@ def _try_stat(filename):
 log_reader_pid = None
 
 
-def start_stdin_log_reader(status, details):
+def start_stdin_log_reader(status, details, debug_locks, debug_pids):
     if vars.RAW_LOGS: return
     global log_reader_pid
     r, w = os.pipe()    # main pipe to redo-log
@@ -47,7 +47,7 @@ def start_stdin_log_reader(status, details):
         os.dup2(w, 1)
         os.dup2(w, 2)
         os.close(w)
-        check_tty()
+        check_tty(sys.stderr)
     else:
         # child
         try:
@@ -61,6 +61,8 @@ def start_stdin_log_reader(status, details):
                 '--ack-fd', str(aw),
                 ('--status' if status and os.isatty(2) else '--no-status'),
                 ('--details' if details else '--no-details'),
+                ('--debug-locks' if debug_locks else '--no-debug-locks'),
+                ('--debug-pids' if debug_pids else '--no-debug-pids'),
                 '-'
             ]
             os.execvp(argv[0], argv)
@@ -182,7 +184,6 @@ class BuildJob:
                 ]
         if vars.VERBOSE: argv[1] += 'v'
         if vars.XTRACE: argv[1] += 'x'
-        if vars.VERBOSE or vars.XTRACE: logs.write('\n')
         firstline = open(os.path.join(dodir, dofile)).readline().strip()
         if firstline.startswith('#!/'):
             argv[0:2] = firstline[2:].split(' ')
@@ -252,7 +253,7 @@ class BuildJob:
             logf.close()
         signal.signal(signal.SIGPIPE, signal.SIG_DFL)  # python ignores SIGPIPE
         if vars.VERBOSE or vars.XTRACE:
-            logs.write('* %s\n' % ' '.join(self.argv).replace('\n', ' '))
+            logs.write('* %s' % ' '.join(self.argv).replace('\n', ' '))
         os.execvp(self.argv[0], self.argv)
         # FIXME: it would be nice to log the exit code to logf.
         #  But that would have to happen in the parent process, which doesn't
@@ -353,6 +354,27 @@ def main(targets, shouldbuildfunc):
     def done(t, rv):
         if rv:
             retcode[0] = 1
+    
+    if vars.TARGET and not vars.UNLOCKED:
+        me = os.path.join(vars.STARTDIR, 
+                          os.path.join(vars.PWD, vars.TARGET))
+        myfile = state.File(name=me)
+        selflock = state.Lock(state.LOG_LOCK_MAGIC + myfile.id)
+    else:
+        selflock = myfile = me = None
+    
+    def cheat():
+        if not selflock: return 0
+        selflock.trylock()
+        if not selflock.owned:
+            # redo-log already owns it: let's cheat.
+            # Give ourselves one extra token so that the "foreground" log
+            # can always make progress.
+            return 1
+        else:
+            # redo-log isn't watching us (yet)
+            selflock.unlock()
+            return 0
 
     # In the first cycle, we just build as much as we can without worrying
     # about any lock contention.  If someone else has it locked, we move on.
@@ -369,7 +391,7 @@ def main(targets, shouldbuildfunc):
         seen[t] = 1
         if not jwack.has_token():
             state.commit()
-        jwack.get_token(t)
+        jwack.ensure_token_or_cheat(t, cheat)
         if retcode[0] and not vars.KEEP_GOING:
             break
         if not state.check_sane():
@@ -409,6 +431,8 @@ def main(targets, shouldbuildfunc):
     while locked or jwack.running():
         state.commit()
         jwack.wait_all()
+        assert jwack._mytokens == 0
+        jwack.ensure_token_or_cheat('self', cheat)
         # at this point, we don't have any children holding any tokens, so
         # it's okay to block below.
         if retcode[0] and not vars.KEEP_GOING:
@@ -427,6 +451,8 @@ def main(targets, shouldbuildfunc):
                 import random
                 time.sleep(random.random() * min(backoff, 1.0))
                 backoff *= 2
+                # after printing this line, redo-log will recurse into t,
+                # whether it's us building it, or someone else.
                 meta('waiting', _nice(t))
                 try:
                     lock.check()
@@ -436,12 +462,14 @@ def main(targets, shouldbuildfunc):
                     return retcode[0]
                 # this sequence looks a little silly, but the idea is to
                 # give up our personal token while we wait for the lock to
-                # be released; but we should never run get_token() while
+                # be released; but we should never run ensure_token() while
                 # holding a lock, or we could cause deadlocks.
                 jwack.release_mine()
                 lock.waitlock()
+                # now t is definitely free, so we get to decide whether
+                # to build it.
                 lock.unlock()
-                jwack.get_token(t)
+                jwack.ensure_token_or_cheat(t, cheat)
                 lock.trylock()
             assert(lock.owned)
             meta('unlocked', _nice(t))
diff --git a/jwack.py b/jwack.py
index 949b756..1aaf432 100644
--- a/jwack.py
+++ b/jwack.py
@@ -1,36 +1,141 @@
 #
-# beware the jobberwack
+# Implementation of a GNU make-compatible jobserver.
+#
+# The basic idea is that both ends of a pipe (tokenfds) are shared with all
+# subprocesses.  At startup, we write one "token" into the pipe for each
+# configured job. (So eg. redo -j20 will put 20 tokens in the pipe.)  In
+# order to do work, you must first obtain a token, by reading the other
+# end of the pipe.  When you're done working, you write the token back into
+# the pipe so that someone else can grab it.
+#
+# The toplevel process in the hierarchy is what creates the pipes in the
+# first place.  Then it puts the pipe file descriptor numbers into MAKEFLAGS,
+# so that subprocesses can pull them back out.
+#
+# As usual, edge cases make all this a bit tricky:
+#
+# - Every process is defined as owning a token at startup time.  This makes
+#   sense because it's backward compatible with single-process make: if a
+#   subprocess neither reads nor writes the pipe, then it has exactly one
+#   token, so it's allowed to do one thread of work.
+#
+# - Thus, for symmetry, processes also must own a token at exit time.
+#
+# - In turn, to make *that* work, a parent process must destroy *its* token
+#   upon launching a subprocess.  (Destroy, not release, because the
+#   subprocess has created its own token.) It can try to obtain another
+#   token, but if none are available, it has to stop work until one of its
+#   subprocesses finishes.  When the subprocess finishes, its token is
+#   destroyed, so the parent creates a new one.
+#
+# - If our process is going to stop and wait for a lock (eg. because we
+#   depend on a target and someone else is already building that target),
+#   we must give up our token.  Otherwise, we're sucking up a "thread" (a
+#   unit of parallelism) just to do nothing.  If enough processes are waiting
+#   on a particular lock, then the process building that target might end up
+#   with only a single token, and everything gets serialized.
+#
+# - Unfortunately this leads to a problem: if we give up our token, we then
+#   have to re-acquire a token before exiting, even if we want to exit with
+#   an error code.
+#
+# - redo-log wants to linearize output so that it always prints log messages
+#   in the order jobs were started; but because of the above, a job being
+#   logged might end up with no tokens for a long time, waiting for some
+#   other branch of the build to complete.
+#
+# As a result, we extend beyond GNU make's model and make things even more
+# complicated.  We add a second pipe, cheatfds, which we use to "cheat" on
+# tokens if our particular job is in the foreground (ie.  is the one
+# currently being tailed by redo-log -f).  We add at most one token per
+# redo-log instance.  If we are the foreground task, and we need a token,
+# and we don't have a token, and we don't have any subtasks (because if we
+# had a subtask, then we're not in the foreground), we synthesize our own
+# token by incrementing _mytokens and _cheats, but we don't read from
+# tokenfds.  Then, when it's time to give up our token again, we also won't
+# write back to tokenfds, so the synthesized token disappears.
+#
+# Of course, all that then leads to *another* problem: every process must
+# hold a *real* token when it exits, because its parent has given up a
+# *real* token in order to start this subprocess.  If we're holding a cheat
+# token when it's time to exit, then we can't meet this requirement.  The
+# obvious thing to do would be to give up the cheat token and wait for a
+# real token, but that might take a very long time, and if we're the last
+# thing preventing our parent from exiting, then redo-log will sit around
+# following our parent until we finally get a token so we can exit,
+# defeating the whole purpose of cheating.  Instead of waiting, we write our
+# "cheater" token to cheatfds.  Then, any task, upon noticing one of its
+# subprocesses has finished, will check to see if there are any tokens on
+# cheatfds; if so, it will remove one of them and *not* re-create its
+# child's token, thus destroying the cheater token from earlier, and restoring
+# balance.
+#
+# Sorry this is so complicated.  I couldn't think of a way to make it
+# simpler :)
 #
 import sys, os, errno, select, fcntl, signal
 from helpers import atoi, close_on_exec
-import state
+import state, vars
 
 _toplevel = 0
 _mytokens = 1
-_fds = None
+_cheats = 0
+_tokenfds = None
+_cheatfds = None
 _waitfds = {}
 
 
 def _debug(s):
     if 0:
         sys.stderr.write('jwack#%d: %s' % (os.getpid(),s))
-    
+
+
+def _create_tokens(n):
+    global _mytokens, _cheats
+    assert n >= 0
+    assert _cheats >= 0
+    for _ in xrange(n):
+        if _cheats > 0:
+            _cheats -= 1
+        else:
+            _mytokens += 1
+
+
+def _destroy_tokens(n):
+    global _mytokens
+    assert _mytokens >= n
+    _mytokens -= n
+
 
 def _release(n):
-    global _mytokens
-    _mytokens += n
-    _debug('release(%d) -> %d\n' % (n, _mytokens))
-    if _mytokens > 1:
-        os.write(_fds[1], 't' * (_mytokens-1))
-        _mytokens = 1
+    global _mytokens, _cheats
+    assert n >= 0
+    assert _mytokens >= n
+    _debug('%d,%d -> release(%d)\n' % (_mytokens, _cheats, n))
+    n_to_share = 0
+    for _ in xrange(n):
+        _mytokens -= 1
+        if _cheats > 0:
+            _cheats -= 1
+        else:
+            n_to_share += 1
+    assert _mytokens >= 0
+    assert _cheats >= 0
+    if n_to_share:
+        _debug('PUT tokenfds %d\n' % n_to_share)
+        os.write(_tokenfds[1], 't' * n_to_share)
+
+
+def _release_except_mine():
+    assert _mytokens > 0
+    _release(_mytokens - 1)
 
 
 def release_mine():
     global _mytokens
-    assert(_mytokens >= 1)
-    _mytokens -= 1
-    _debug('release_mine() -> %d\n' % _mytokens)
-    os.write(_fds[1], 't')
+    assert _mytokens >= 1
+    _debug('%d,%d -> release_mine()\n' % (_mytokens, _cheats))
+    _release(1)
 
 
 def _timeout(sig, frame):
@@ -50,38 +155,51 @@ def _make_pipe(startfd):
 
 
 def _try_read(fd, n):
+    """Try to read n bytes from fd.  Returns: '' on EOF, None if EAGAIN."""
+    assert state.is_flushed()
+
     # using djb's suggested way of doing non-blocking reads from a blocking
     # socket: http://cr.yp.to/unix/nonblock.html
     # We can't just make the socket non-blocking, because we want to be
     # compatible with GNU Make, and they can't handle it.
     r,w,x = select.select([fd], [], [], 0)
     if not r:
-        return ''  # try again
+        return None  # try again
     # ok, the socket is readable - but some other process might get there
     # first.  We have to set an alarm() in case our read() gets stuck.
-    assert(state.is_flushed())
     oldh = signal.signal(signal.SIGALRM, _timeout)
     try:
         signal.setitimer(signal.ITIMER_REAL, 0.01, 0.01)  # emergency fallback
         try:
-            b = os.read(_fds[0], 1)
+            b = os.read(fd, 1)
         except OSError, e:
             if e.errno in (errno.EAGAIN, errno.EINTR):
                 # interrupted or it was nonblocking
-                return ''  # try again
+                return None  # try again
             else:
                 raise
     finally:
         signal.setitimer(signal.ITIMER_REAL, 0, 0)
         signal.signal(signal.SIGALRM, oldh)
-    return b and b or None  # None means EOF
+    return b
+
+
+def _try_read_all(fd, n):
+    bb = ''
+    while 1:
+        b = _try_read(fd, n)
+        if not b:
+            break
+        bb += b
+    return bb
 
 
 def setup(maxjobs):
-    global _fds, _toplevel
-    if _fds:
-        return  # already set up
+    global _tokenfds, _cheatfds, _toplevel
+    assert maxjobs > 0
+    assert not _tokenfds
     _debug('setup(%d)\n' % maxjobs)
+    
     flags = ' ' + os.getenv('MAKEFLAGS', '') + ' '
     FIND1 = ' --jobserver-auth='  # renamed in GNU make 4.2
     FIND2 = ' --jobserver-fds='   # fallback syntax
@@ -106,38 +224,64 @@ def setup(maxjobs):
                 raise ValueError('broken --jobserver-auth from make; prefix your Makefile rule with a "+"')
             else:
                 raise
-        _fds = (a,b)
-    if maxjobs and not _fds:
+        _tokenfds = (a,b)
+    
+    cheats = os.getenv('REDO_CHEATFDS', '')
+    if cheats:
+        (a,b) = cheats.split(',', 1)
+        a = atoi(a)
+        b = atoi(b)
+        if a <= 0 or b <= 0:
+            raise ValueError('invalid REDO_CHEATFDS: %r' % cheats)
+        _cheatfds = (a,b)
+    
+    if not _tokenfds:
         # need to start a new server
         _toplevel = maxjobs
-        _fds = _make_pipe(100)
-        _release(maxjobs-1)
+        _tokenfds = _make_pipe(100)
+        _create_tokens(maxjobs - 1)
+        _release_except_mine()
         os.putenv('MAKEFLAGS',
                   '%s -j --jobserver-auth=%d,%d --jobserver-fds=%d,%d' %
                   (os.getenv('MAKEFLAGS', ''),
-                   _fds[0], _fds[1],
-                   _fds[0], _fds[1]))
+                   _tokenfds[0], _tokenfds[1],
+                   _tokenfds[0], _tokenfds[1]))
+        _cheatfds = _make_pipe(102)
+        os.putenv('REDO_CHEATFDS', '%d,%d' % (_cheatfds[0], _cheatfds[1]))
 
 
-def wait(want_token):
+def _wait(want_token, max_delay):
     rfds = _waitfds.keys()
-    if _fds and want_token:
-        rfds.append(_fds[0])
-    assert(rfds)
-    assert(state.is_flushed())
-    r,w,x = select.select(rfds, [], [])
-    _debug('_fds=%r; wfds=%r; readable: %r\n' % (_fds, _waitfds, r))
+    if want_token:
+        rfds.append(_tokenfds[0])
+    assert rfds
+    assert state.is_flushed()
+    r,w,x = select.select(rfds, [], [], max_delay)
+    _debug('_tokenfds=%r; wfds=%r; readable: %r\n' % (_tokenfds, _waitfds, r))
     for fd in r:
-        if _fds and fd == _fds[0]:
+        if fd == _tokenfds[0]:
             pass
         else:
             pd = _waitfds[fd]
             _debug("done: %r\n" % pd.name)
-            _release(1)
+            # redo subprocesses are expected to die without releasing their
+            # tokens, so things are less likely to get confused if they
+            # die abnormally.  That means a token has 'disappeared' and we
+            # now need to recreate it.
+            b = _try_read(_cheatfds[0], 1)
+            _debug('GOT cheatfd\n')
+            if b == None:
+                _create_tokens(1)
+                if has_token():
+                    _release_except_mine()
+            else:
+                # someone exited with _cheats > 0, so we need to compensate
+                # by *not* re-creating a token now.
+                pass
             os.close(fd)
             del _waitfds[fd]
             rv = os.waitpid(pd.pid, 0)
-            assert(rv[0] == pd.pid)
+            assert rv[0] == pd.pid
             _debug("done1: rv=%r\n" % (rv,))
             rv = rv[1]
             if os.WIFEXITED(rv):
@@ -149,36 +293,59 @@ def wait(want_token):
 
 
 def has_token():
+    assert _mytokens >= 0
     if _mytokens >= 1:
         return True
 
 
-def get_token(reason):
-    assert(state.is_flushed())
+def ensure_token(reason, max_delay=None):
     global _mytokens
-    assert(_mytokens <= 1)
-    setup(1)
+    assert state.is_flushed()
+    assert _mytokens <= 1
     while 1:
         if _mytokens >= 1:
             _debug("_mytokens is %d\n" % _mytokens)
-            assert(_mytokens == 1)
+            assert _mytokens == 1
             _debug('(%r) used my own token...\n' % reason)
             break
-        assert(_mytokens < 1)
+        assert _mytokens < 1
         _debug('(%r) waiting for tokens...\n' % reason)
-        wait(want_token=1)
+        _wait(want_token=1, max_delay=max_delay)
         if _mytokens >= 1:
             break
-        assert(_mytokens < 1)
-        if _fds:
-            b = _try_read(_fds[0], 1)
-            if b == None:
-                raise Exception('unexpected EOF on token read')
-            if b:
-                _mytokens += 1
-                _debug('(%r) got a token (%r).\n' % (reason, b))
+        assert _mytokens < 1
+        b = _try_read(_tokenfds[0], 1)
+        _debug('GOT tokenfd\n')
+        if b == '':
+            raise Exception('unexpected EOF on token read')
+        if b:
+            _mytokens += 1
+            _debug('(%r) got a token (%r).\n' % (reason, b))
+            break
+        if max_delay != None:
+            break
+    assert _mytokens <= 1
+
+
+def ensure_token_or_cheat(reason, cheatfunc):
+    global _mytokens, _cheats
+    backoff = 0.01
+    while not has_token():
+        while running() and not has_token():
+            # If we already have a subproc running, then effectively we
+            # already have a token.  Don't create a cheater token unless
+            # we're completely idle.
+            ensure_token(reason, max_delay=None)
+        ensure_token(reason, max_delay=min(1.0, backoff))
+        backoff *= 2
+        if not has_token():
+            assert _mytokens == 0
+            n = cheatfunc()
+            _debug('%s: %s: cheat = %d\n' % (vars.TARGET, reason, n))
+            if n > 0:
+                _mytokens += n
+                _cheats += n
                 break
-    assert(_mytokens <= 1)
 
 
 def running():
@@ -186,37 +353,50 @@ def running():
 
 
 def wait_all():
-    _debug("wait_all\n")
-    assert(state.is_flushed())
-    while running():
+    _debug("%d,%d -> wait_all\n" % (_mytokens, _cheats))
+    assert state.is_flushed()
+    while 1:
         while _mytokens >= 1:
             release_mine()
+        if not running():
+            break
         _debug("wait_all: wait()\n")
-        wait(want_token=0)
+        _wait(want_token=0, max_delay=None)
     _debug("wait_all: empty list\n")
-    get_token('self')  # get my token back
     if _toplevel:
-        bb = ''
-        while 1:
-            b = _try_read(_fds[0], 8192)
-            bb += b
-            if not b: break
-        if len(bb) != _toplevel-1:
-            raise Exception('on exit: expected %d tokens; found %r'
-                            % (_toplevel-1, len(bb)))
-        os.write(_fds[1], bb)
+        # If we're the toplevel and we're sure no child processes remain,
+        # then we know we're totally idle.  Self-test to ensure no tokens
+        # mysteriously got created/destroyed.
+        tokens = _try_read_all(_tokenfds[0], 8192)
+        cheats = _try_read_all(_cheatfds[0], 8192)
+        _debug('toplevel: GOT %d tokens and %d cheats\n'
+               % (len(tokens), len(cheats)))
+        if len(tokens) - len(cheats) != _toplevel:
+            raise Exception('on exit: expected %d tokens; found %r-%r'
+                            % (_toplevel, len(tokens), len(cheats)))
+        os.write(_tokenfds[1], tokens)
+    # note: when we return, we have *no* tokens, not even our own!
+    # If caller wants to continue, they have to obtain one right away.
 
 
 def force_return_tokens():
     n = len(_waitfds)
-    if n:
-        _debug('%d tokens left in force_return_tokens\n' % n)
-    _debug('returning %d tokens\n' % n)
+    _debug('%d,%d -> %d jobs left in force_return_tokens\n'
+           % (_mytokens, _cheats, n))
     for k in _waitfds.keys():
         del _waitfds[k]
-    if _fds:
-        _release(n)
-    assert(state.is_flushed())
+    _create_tokens(n)
+    if has_token():
+        _release_except_mine()
+        assert _mytokens == 1, 'mytokens=%d' % _mytokens
+    assert _cheats <= _mytokens, 'mytokens=%d cheats=%d' % (_mytokens, _cheats)
+    assert _cheats in (0, 1), 'cheats=%d' % _cheats
+    if _cheats:
+        _debug('%d,%d -> force_return_tokens: recovering final token\n'
+               % (_mytokens, _cheats))
+        _destroy_tokens(_cheats)
+        os.write(_cheatfds[1], 't' * _cheats)
+    assert state.is_flushed()
 
 
 def _pre_job(r, w, pfn):
@@ -237,13 +417,13 @@ class Job:
 
             
 def start_job(reason, jobfunc, donefunc):
-    assert(state.is_flushed())
+    assert state.is_flushed()
     global _mytokens
-    assert(_mytokens <= 1)
-    get_token(reason)
-    assert(_mytokens >= 1)
-    assert(_mytokens == 1)
-    _mytokens -= 1
+    assert _mytokens <= 1
+    assert _mytokens == 1
+    # Subprocesses always start with 1 token, so we have to destroy ours
+    # in order for the universe to stay in balance.
+    _destroy_tokens(1)
     r,w = _make_pipe(50)
     pid = os.fork()
     if pid == 0:
diff --git a/logs.py b/logs.py
index 2c093b6..08071af 100644
--- a/logs.py
+++ b/logs.py
@@ -2,9 +2,9 @@ import os, re, sys, time
 import vars
 
 
-def check_tty():
+def check_tty(file):
     global RED, GREEN, YELLOW, BOLD, PLAIN
-    if sys.stderr.isatty() and (os.environ.get('TERM') or 'dumb') != 'dumb':
+    if file.isatty() and (os.environ.get('TERM') or 'dumb') != 'dumb':
         # ...use ANSI formatting codes.
         RED    = "\x1b[31m"
         GREEN  = "\x1b[32m"
@@ -52,15 +52,14 @@ class PrettyLog(object):
         sys.stderr.flush()
         g = REDO_RE.match(s)
         if g:
-            # FIXME: support vars.DEBUG_PIDS somewhere
             all = g.group(0)
             self.file.write(s[:-len(all)])
             words = g.group(1).split(':')
             text = g.group(2)
             kind, pid, when = words[0:3]
+            pid = int(pid)
             if kind == 'unchanged':
-                if vars.DEBUG >= 1:
-                    self._pretty(pid, '', '%s (unchanged)' % text)
+                self._pretty(pid, '', '%s (unchanged)' % text)
             elif kind == 'check':
                 self._pretty(pid, GREEN, '(%s)' % text)
             elif kind == 'do':
@@ -102,7 +101,7 @@ _log = None
 def setup(file, pretty):
     global _log
     if pretty:
-        check_tty()
+        check_tty(file)
         _log = PrettyLog(file=file)
     else:
         _log = RawLog(file=file)
diff --git a/redo-ifchange.py b/redo-ifchange.py
index 72b84dd..86de027 100755
--- a/redo-ifchange.py
+++ b/redo-ifchange.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python2
-import sys, os
+import sys, os, traceback
 
 import vars_init
 vars_init.init(sys.argv[1:])
@@ -20,7 +20,8 @@ def should_build(t):
 rv = 202
 try:
     if vars_init.is_toplevel:
-        builder.start_stdin_log_reader(status=True, details=True)
+        builder.start_stdin_log_reader(status=True, details=True,
+            debug_locks=False, debug_pids=False)
     if vars.TARGET and not vars.UNLOCKED:
         me = os.path.join(vars.STARTDIR, 
                           os.path.join(vars.PWD, vars.TARGET))
@@ -29,6 +30,7 @@ try:
     else:
         f = me = None
         debug2('redo-ifchange: not adding depends.\n')
+    jwack.setup(1)
     try:
         targets = sys.argv[1:]
         if f:
@@ -41,7 +43,12 @@ try:
         try:
             state.rollback()
         finally:
-            jwack.force_return_tokens()
+            try:
+                jwack.force_return_tokens()
+            except Exception, e:
+                traceback.print_exc(100, sys.stderr)
+                err('unexpected error: %r\n' % e)
+                rv = 1
 except KeyboardInterrupt:
     if vars_init.is_toplevel:
         builder.await_log_reader()
diff --git a/redo-log.py b/redo-log.py
index d5608ae..159d6ca 100755
--- a/redo-log.py
+++ b/redo-log.py
@@ -12,6 +12,8 @@ no-details      only show 'redo' recursion trace, not build output
 no-colorize     don't colorize 'redo' log messages
 no-status       don't display build summary line in --follow
 raw-logs        don't format logs, just send raw output straight to stdout
+debug-locks     print messages about file locking (useful for debugging)
+debug-pids      print process ids in log messages (useful for debugging)
 ack-fd=         (internal use only) print REDO-OK to this fd upon starting
 """
 o = options.Options(optspec)
@@ -75,6 +77,7 @@ def catlog(t):
     if t == '-':
         f = sys.stdin
         fid = None
+        loglock = None
         logname = None
     else:
         try:
@@ -86,6 +89,8 @@ def catlog(t):
         del sf
         state.rollback()
         logname = state.logname(fid)
+        loglock = state.Lock(fid + state.LOG_LOCK_MAGIC)
+        loglock.waitlock(shared=True)
         f = None
     delay = 0.01
     was_locked = is_locked(fid)
@@ -161,20 +166,31 @@ def catlog(t):
             kind, pid, when = words.split(':')[0:3]
             if kind == 'unchanged':
                 if opt.unchanged:
-                    if text not in already:
+                    if opt.debug_locks:
                         logs.write(line.rstrip())
+                    elif text not in already:
+                        logs.meta('do', text)
                     if opt.recursive:
+                        if loglock: loglock.unlock()
                         catlog(text)
-            elif kind in ('do', 'waiting'):
-                logs.write(line.rstrip())
+                        if loglock: loglock.waitlock(shared=True)
+            elif kind in ('do', 'waiting', 'locked', 'unlocked'):
+                if opt.debug_locks:
+                    logs.write(line.rstrip())
+                elif text not in already:
+                    logs.meta('do', text)
                 if opt.recursive:
                     assert text
+                    if loglock: loglock.unlock()
                     catlog(text)
+                    if loglock: loglock.waitlock(shared=True)
             else:
                 logs.write(line.rstrip())
         else:
             if opt.details:
                 logs.write(line.rstrip())
+    if loglock:
+        loglock.unlock()
     if status:
         sys.stdout.flush()
         sys.stderr.write('\r%-*.*s\r' % (width, width, ''))
@@ -197,6 +213,10 @@ try:
         logs.setup(file=sys.stdout, pretty=False)
     else:
         logs.setup(file=sys.stdout, pretty=True)
+    if opt.debug_locks:
+        vars.DEBUG_LOCKS = 1
+    if opt.debug_pids:
+        vars.DEBUG_PIDS = 1
     if opt.ack_fd:
         # Write back to owner, to let them know we started up okay and
         # will be able to see their error output, so it's okay to close
diff --git a/redo.py b/redo.py
index 4dc4780..cc6124a 100755
--- a/redo.py
+++ b/redo.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python2
-import sys, os
+import sys, os, traceback
 import options
 from helpers import atoi
 
@@ -54,7 +54,8 @@ from logs import warn, err
 
 try:
     if vars_init.is_toplevel:
-        builder.start_stdin_log_reader(status=opt.status, details=opt.details)
+        builder.start_stdin_log_reader(status=opt.status, details=opt.details,
+            debug_locks=opt.debug_locks, debug_pids=opt.debug_pids)
     for t in targets:
         if os.path.exists(t):
             f = state.File(name=t)
@@ -75,7 +76,12 @@ try:
         try:
             state.rollback()
         finally:
-            jwack.force_return_tokens()
+            try:
+                jwack.force_return_tokens()
+            except Exception, e:
+                traceback.print_exc(100, sys.stderr)
+                err('unexpected error: %r\n' % e)
+                retcode = 1
     if vars_init.is_toplevel:
         builder.await_log_reader()
     sys.exit(retcode)
diff --git a/state.py b/state.py
index af484db..4556e68 100644
--- a/state.py
+++ b/state.py
@@ -21,6 +21,8 @@ ALWAYS='//ALWAYS'   # an invalid filename that is always marked as dirty
 STAMP_DIR='dir'     # the stamp of a directory; mtime is unhelpful
 STAMP_MISSING='0'   # the stamp of a nonexistent file
 
+LOG_LOCK_MAGIC=0x10000000  # fid offset for "log locks"
+
 
 class CyclicDependencyError(Exception): pass
 
@@ -374,9 +376,11 @@ class Lock:
             self.owned = True
         return self.owned
 
-    def waitlock(self):
+    def waitlock(self, shared=False):
         self.check()
-        fcntl.lockf(self.lockfile, fcntl.LOCK_EX, 0, 0)
+        fcntl.lockf(self.lockfile,
+            fcntl.LOCK_SH if shared else fcntl.LOCK_EX,
+            0, 0)
         self.owned = True
             
     def unlock(self):