The second half of redo-stamp: out-of-order building.

If a depends on b depends on c, and c is dirty but b uses redo-stamp checksums, then 'redo-ifchange a' is indeterminate: we won't know if we need to run a.do unless we first build b, but the script that *normally* runs 'redo-ifchange b' is a.do, and we don't want to run that yet, because we don't know for sure if b is dirty, and we shouldn't build a unless one of its dependencies is dirty. Eek! Luckily, there's a safe solution. If we *know* a is dirty - eg. because a.do or one of its children has definitely changed - then we can just run a.do immediately and there's no problem, even if b is indeterminate, because we were going to run a.do anyhow. If a's dependencies are *not* definitely dirty, and all we have is indeterminate ones like b, then that means a's build process *hasn't changed*, which means its tree of dependencies still includes b, which means we can deduce that if we *did* run a.do, it would end up running b.do. Since we know that anyhow, we can safely just run b.do, which will either b.set_checked() or b.set_changed(). Once that's done, we can re-parse a's dependencies and this time conclusively tell if it needs to be redone or not. Even if it does, b is already up-to-date, so the 'redo-ifchange b' line in a.do will be fast. ...now take all the above and do it recursively to handle nested dependencies, etc, and you're done.
2010-12-11 04:40:05 -08:00 · 2010-12-11 04:40:05 -08:00 · f702417ef3
commit f702417ef3
parent 1355ade7c7
6 changed files with 124 additions and 26 deletions
--- a/builder.py
+++ b/builder.py
@ -65,14 +65,23 @@ class BuildJob:
    def start(self):
        assert(self.lock.owned)
        t = self.t
        sf = self.sf
        try:
-            if not self.shouldbuildfunc(t):
+            dirty = self.shouldbuildfunc(self.t)
            if not dirty:
                # target doesn't need to be built; skip the whole task
                return self._after2(0)
        except ImmediateReturn, e:
            return self._after2(e.rv)
        if dirty == True:
            self._start_do()
        else:
            self._start_oob(dirty)
    def _start_do(self):
        assert(self.lock.owned)
        t = self.t
        sf = self.sf
        newstamp = sf.read_stamp()
        if (sf.is_generated and
            not sf.failed_runid and
@ -132,6 +141,27 @@ class BuildJob:
        state.commit()
        jwack.start_job(t, self._do_subproc, self._after)
    def _start_oob(self, dirty):
        # out-of-band redo of some sub-objects.  This happens when we're not
        # quite sure if t needs to be built or not (because some children look
        # dirty, but might turn out to be clean thanks to checksums).  We have
        # to call redo-oob to figure it all out.
        #
        # Note: redo-oob will handle all the updating of sf, so we don't have
        # to do it here, nor call _after1.
        argv = ['redo-oob', self.sf.name] + [d.name for d in dirty]
        log('(%s)\n' % _nice(self.t))
        state.commit()
        def run():
            os.chdir(vars.BASE)
            os.environ['REDO_DEPTH'] = vars.DEPTH + '  '
            os.execvp(argv[0], argv)
            assert(0)
            # returns only if there's an exception
        def after(t, rv):
            return self._after2(rv)
        jwack.start_job(self.t, run, after)
    def _do_subproc(self):
        # careful: REDO_PWD was the PWD relative to the STARTPATH at the time
        # we *started* building the current target; but that target ran
@ -250,7 +280,10 @@ def main(targets, shouldbuildfunc):
            break
        f = state.File(name=t)
        lock = state.Lock(f.id)
-        lock.trylock()
+        if vars.UNLOCKED:
            lock.owned = True
        else:
            lock.trylock()
        if not lock.owned:
            if vars.DEBUG_LOCKS:
                log('%s (locked...)\n' % _nice(t))
--- a/redo-ifchange.py
+++ b/redo-ifchange.py
@ -6,46 +6,82 @@ from helpers import debug, debug2, err, unlink
 def _nice(t):
    return state.relpath(os.path.join(vars.BASE, t), vars.STARTDIR)
 CLEAN = 0
 DIRTY = 1
 def dirty_deps(f, depth, max_changed):
    if vars.DEBUG >= 1:
        debug('%s?%s\n' % (depth, _nice(f.name)))
    if f.failed_runid:
        debug('%s-- DIRTY (failed last time)\n' % depth)
-        return True
+        return DIRTY
    if f.changed_runid == None:
        debug('%s-- DIRTY (never built)\n' % depth)
-        return True
+        return DIRTY
    if f.changed_runid > max_changed:
        debug('%s-- DIRTY (built)\n' % depth)
-        return True  # has been built more recently than parent
+        return DIRTY  # has been built more recently than parent
    if f.is_checked():
-        if vars.DEBUG >= 1: debug('%s-- CLEAN (checked)\n' % depth)
+        if vars.DEBUG >= 1:
-        return False  # has already been checked during this session
+            debug('%s-- CLEAN (checked)\n' % depth)
-
+        return CLEAN  # has already been checked during this session
    if not f.stamp:
        debug('%s-- DIRTY (no stamp)\n' % depth)
-        return True
+        return DIRTY
    if f.stamp != f.read_stamp():
        debug('%s-- DIRTY (mtime)\n' % depth)
-        return True
+        return DIRTY
-    
+
    must_build = []
    for mode,f2 in f.deps():
        dirty = CLEAN
        if mode == 'c':
            if os.path.exists(os.path.join(vars.BASE, f2.name)):
                debug('%s-- DIRTY (created)\n' % depth)
-                return True
+                dirty = DIRTY
        elif mode == 'm':
-            if dirty_deps(f2, depth = depth + '  ',
+            sub = dirty_deps(f2, depth = depth + '  ',
-                          max_changed = max(f.changed_runid, f.checked_runid)):
+                             max_changed = max(f.changed_runid,
                                               f.checked_runid))
            if sub:
                debug('%s-- DIRTY (sub)\n' % depth)
-                return True
+                dirty = sub
        else:
            assert(mode in ('c','m'))
        if not f.csum:
            # f is a "normal" target: dirty f2 means f is instantly dirty
            if dirty:
                # if dirty==DIRTY, this means f is definitely dirty.
                # if dirty==[...], it's a list of the uncertain children.
                return dirty
        else:
            # f is "checksummable": dirty f2 means f needs to redo,
            # but f might turn out to be clean after that (ie. our parent
            # might not be dirty).
            if dirty == DIRTY:
                # f2 is definitely dirty, so f definitely needs to
                # redo.  However, after that, f might turn out to be
                # unchanged.
                return [f]
            elif isinstance(dirty,list):
                # our child f2 might be dirty, but it's not sure yet.  It's
                # given us a list of targets we have to redo in order to
                # be sure.
                must_build += dirty
    if must_build:
        # f is *maybe* dirty because at least one of its children is maybe
        # dirty.  must_build has accumulated a list of "topmost" uncertain
        # objects in the tree.  If we build all those, we can then
        # redo-ifchange f and it won't have any uncertainty next time.
        return must_build
    # if we get here, it's because the target is clean
    if f.is_override:
        builder.warn_override(f.name)
    f.set_checked()
    f.save()
-    return False
+    return CLEAN
 def should_build(t):
@ -63,9 +99,10 @@ try:
    debug2('TARGET: %r %r %r\n' % (vars.STARTDIR, vars.PWD, vars.TARGET))
    try:
        targets = sys.argv[1:]
-        for t in targets:
+        if not vars.UNLOCKED:
-            f.add_dep('m', t)
+            for t in targets:
-        f.save()
+                f.add_dep('m', t)
            f.save()
        rv = builder.main(targets, should_build)
    finally:
        jwack.force_return_tokens()
--- a/1
+++ b/1
@ -0,0 +1 @@
 redo-oob.py
--- a/redo-oob.py
+++ b/redo-oob.py
@ -0,0 +1,24 @@
 #!/usr/bin/python
 import sys, os
 import state
 from helpers import err
 if len(sys.argv[1:]) < 2:
    err('%s: at least 2 arguments expected.\n' % sys.argv[0])
    sys.exit(1)
 target = sys.argv[1]
 deps = sys.argv[2:]
 me = state.File(name=target)
 argv = ['redo'] + deps
 rv = os.spawnvp(os.P_WAIT, argv[0], argv)
 if rv:
    sys.exit(rv)
 os.environ['REDO_UNLOCKED'] = '1'
 argv = ['redo-ifchange', target]
 rv = os.spawnvp(os.P_WAIT, argv[0], argv)
 if rv:
    sys.exit(rv)
--- a/t/stamp/stamptest.do
+++ b/t/stamp/stamptest.do
@ -22,19 +22,19 @@ redo-ifchange usestamp
 redo bob
 redo-ifchange usestamp
 [ "$(wc -l <stampy.log)" -eq 3 ] || exit 43
-[ "$(wc -l <usestamp.log)" -eq 2 ] || exit 44
+[ "$(wc -l <usestamp.log)" -eq 1 ] || exit 44
 ../flush-cache.sh
 redo-ifchange usestamp
 [ "$(wc -l <stampy.log)" -eq 3 ] || exit 45
-[ "$(wc -l <usestamp.log)" -eq 2 ] || exit 46
+[ "$(wc -l <usestamp.log)" -eq 1 ] || exit 46
 ../flush-cache.sh
 echo two >inp
 redo stampy
 [ "$(wc -l <stampy.log)" -eq 4 ] || exit 51
-[ "$(wc -l <usestamp.log)" -eq 2 ] || exit 52
+[ "$(wc -l <usestamp.log)" -eq 1 ] || exit 52
 redo-ifchange usestamp
 [ "$(wc -l <stampy.log)" -eq 4 ] || exit 61
-[ "$(wc -l <usestamp.log)" -eq 3 ] || exit 62
+[ "$(wc -l <usestamp.log)" -eq 2 ] || exit 62
--- a/vars.py
+++ b/vars.py
@ -22,3 +22,6 @@ RUNID = atoi.atoi(os.environ.get('REDO_RUNID')) or None
 BASE = os.environ['REDO_BASE']
 while BASE and BASE.endswith('/'):
    BASE = BASE[:-1]
 UNLOCKED = os.environ.get('REDO_UNLOCKED', '') and 1 or 0
 os.environ['REDO_UNLOCKED'] = ''  # not inheritable by subprocesses