redo-log: prioritize the "foreground" process.

When running a parallel build, redo-log -f (which is auto-started by redo) tries to traverse through the logs depth first, in the order parent processes started subprocesses. This works pretty well, but if its dependencies are locked, a process might have to give up its jobserver token while other stuff builds its dependencies. After the dependency finishes, the parent might not be able to get a token for quite some time, and the logs will appear to stop. To prevent this from happening, we can instantiate up to one "cheater" token, only in the foreground process (the one locked by redo-log -f), which will allow it to continue running, albeit a bit slowly (since it only has one token out of possibly many). When the process finishes, we then destroy the fake token. It gets a little complicated; see explanation at the top of jwack.py.
2018-11-17 04:32:09 -05:00 · 2018-11-17 04:32:09 -05:00 · 8b5a567b2e
commit 8b5a567b2e
parent e10bd1a9eb
7 changed files with 348 additions and 104 deletions
--- a/builder.py
+++ b/builder.py
@ -22,7 +22,7 @@ def _try_stat(filename):
 log_reader_pid = None


-def start_stdin_log_reader(status, details):
+def start_stdin_log_reader(status, details, debug_locks, debug_pids):
    if vars.RAW_LOGS: return
    global log_reader_pid
    r, w = os.pipe()    # main pipe to redo-log
@ -47,7 +47,7 @@ def start_stdin_log_reader(status, details):
        os.dup2(w, 1)
        os.dup2(w, 2)
        os.close(w)
-        check_tty()
+        check_tty(sys.stderr)
    else:
        # child
        try:
@ -61,6 +61,8 @@ def start_stdin_log_reader(status, details):
                '--ack-fd', str(aw),
                ('--status' if status and os.isatty(2) else '--no-status'),
                ('--details' if details else '--no-details'),
+                ('--debug-locks' if debug_locks else '--no-debug-locks'),
+                ('--debug-pids' if debug_pids else '--no-debug-pids'),
                '-'
            ]
            os.execvp(argv[0], argv)
@ -182,7 +184,6 @@ class BuildJob:
                ]
        if vars.VERBOSE: argv[1] += 'v'
        if vars.XTRACE: argv[1] += 'x'
-        if vars.VERBOSE or vars.XTRACE: logs.write('\n')
        firstline = open(os.path.join(dodir, dofile)).readline().strip()
        if firstline.startswith('#!/'):
            argv[0:2] = firstline[2:].split(' ')
@ -252,7 +253,7 @@ class BuildJob:
            logf.close()
        signal.signal(signal.SIGPIPE, signal.SIG_DFL)  # python ignores SIGPIPE
        if vars.VERBOSE or vars.XTRACE:
-            logs.write('* %s\n' % ' '.join(self.argv).replace('\n', ' '))
+            logs.write('* %s' % ' '.join(self.argv).replace('\n', ' '))
        os.execvp(self.argv[0], self.argv)
        # FIXME: it would be nice to log the exit code to logf.
        #  But that would have to happen in the parent process, which doesn't
@ -353,6 +354,27 @@ def main(targets, shouldbuildfunc):
    def done(t, rv):
        if rv:
            retcode[0] = 1
+    
+    if vars.TARGET and not vars.UNLOCKED:
+        me = os.path.join(vars.STARTDIR, 
+                          os.path.join(vars.PWD, vars.TARGET))
+        myfile = state.File(name=me)
+        selflock = state.Lock(state.LOG_LOCK_MAGIC + myfile.id)
+    else:
+        selflock = myfile = me = None
+    
+    def cheat():
+        if not selflock: return 0
+        selflock.trylock()
+        if not selflock.owned:
+            # redo-log already owns it: let's cheat.
+            # Give ourselves one extra token so that the "foreground" log
+            # can always make progress.
+            return 1
+        else:
+            # redo-log isn't watching us (yet)
+            selflock.unlock()
+            return 0

    # In the first cycle, we just build as much as we can without worrying
    # about any lock contention.  If someone else has it locked, we move on.
@ -369,7 +391,7 @@ def main(targets, shouldbuildfunc):
        seen[t] = 1
        if not jwack.has_token():
            state.commit()
-        jwack.get_token(t)
+        jwack.ensure_token_or_cheat(t, cheat)
        if retcode[0] and not vars.KEEP_GOING:
            break
        if not state.check_sane():
@ -409,6 +431,8 @@ def main(targets, shouldbuildfunc):
    while locked or jwack.running():
        state.commit()
        jwack.wait_all()
+        assert jwack._mytokens == 0
+        jwack.ensure_token_or_cheat('self', cheat)
        # at this point, we don't have any children holding any tokens, so
        # it's okay to block below.
        if retcode[0] and not vars.KEEP_GOING:
@ -427,6 +451,8 @@ def main(targets, shouldbuildfunc):
                import random
                time.sleep(random.random() * min(backoff, 1.0))
                backoff *= 2
+                # after printing this line, redo-log will recurse into t,
+                # whether it's us building it, or someone else.
                meta('waiting', _nice(t))
                try:
                    lock.check()
@ -436,12 +462,14 @@ def main(targets, shouldbuildfunc):
                    return retcode[0]
                # this sequence looks a little silly, but the idea is to
                # give up our personal token while we wait for the lock to
-                # be released; but we should never run get_token() while
+                # be released; but we should never run ensure_token() while
                # holding a lock, or we could cause deadlocks.
                jwack.release_mine()
                lock.waitlock()
+                # now t is definitely free, so we get to decide whether
+                # to build it.
                lock.unlock()
-                jwack.get_token(t)
+                jwack.ensure_token_or_cheat(t, cheat)
                lock.trylock()
            assert(lock.owned)
            meta('unlocked', _nice(t))