The second half of redo-stamp: out-of-order building.

If a depends on b depends on c, and c is dirty but b uses redo-stamp
checksums, then 'redo-ifchange a' is indeterminate: we won't know if we need
to run a.do unless we first build b, but the script that *normally* runs
'redo-ifchange b' is a.do, and we don't want to run that yet, because we
don't know for sure if b is dirty, and we shouldn't build a unless one of
its dependencies is dirty.  Eek!

Luckily, there's a safe solution.  If we *know* a is dirty - eg. because
a.do or one of its children has definitely changed - then we can just run
a.do immediately and there's no problem, even if b is indeterminate, because
we were going to run a.do anyhow.

If a's dependencies are *not* definitely dirty, and all we have is
indeterminate ones like b, then that means a's build process *hasn't
changed*, which means its tree of dependencies still includes b, which means
we can deduce that if we *did* run a.do, it would end up running b.do.

Since we know that anyhow, we can safely just run b.do, which will either
b.set_checked() or b.set_changed().  Once that's done, we can re-parse a's
dependencies and this time conclusively tell if it needs to be redone or
not.  Even if it does, b is already up-to-date, so the 'redo-ifchange b'
line in a.do will be fast.

...now take all the above and do it recursively to handle nested
dependencies, etc, and you're done.
This commit is contained in:
Avery Pennarun 2010-12-11 04:40:05 -08:00
commit f702417ef3
6 changed files with 124 additions and 26 deletions

View file

@ -65,14 +65,23 @@ class BuildJob:
def start(self): def start(self):
assert(self.lock.owned) assert(self.lock.owned)
t = self.t
sf = self.sf
try: try:
if not self.shouldbuildfunc(t): dirty = self.shouldbuildfunc(self.t)
if not dirty:
# target doesn't need to be built; skip the whole task # target doesn't need to be built; skip the whole task
return self._after2(0) return self._after2(0)
except ImmediateReturn, e: except ImmediateReturn, e:
return self._after2(e.rv) return self._after2(e.rv)
if dirty == True:
self._start_do()
else:
self._start_oob(dirty)
def _start_do(self):
assert(self.lock.owned)
t = self.t
sf = self.sf
newstamp = sf.read_stamp() newstamp = sf.read_stamp()
if (sf.is_generated and if (sf.is_generated and
not sf.failed_runid and not sf.failed_runid and
@ -132,6 +141,27 @@ class BuildJob:
state.commit() state.commit()
jwack.start_job(t, self._do_subproc, self._after) jwack.start_job(t, self._do_subproc, self._after)
def _start_oob(self, dirty):
# out-of-band redo of some sub-objects. This happens when we're not
# quite sure if t needs to be built or not (because some children look
# dirty, but might turn out to be clean thanks to checksums). We have
# to call redo-oob to figure it all out.
#
# Note: redo-oob will handle all the updating of sf, so we don't have
# to do it here, nor call _after1.
argv = ['redo-oob', self.sf.name] + [d.name for d in dirty]
log('(%s)\n' % _nice(self.t))
state.commit()
def run():
os.chdir(vars.BASE)
os.environ['REDO_DEPTH'] = vars.DEPTH + ' '
os.execvp(argv[0], argv)
assert(0)
# returns only if there's an exception
def after(t, rv):
return self._after2(rv)
jwack.start_job(self.t, run, after)
def _do_subproc(self): def _do_subproc(self):
# careful: REDO_PWD was the PWD relative to the STARTPATH at the time # careful: REDO_PWD was the PWD relative to the STARTPATH at the time
# we *started* building the current target; but that target ran # we *started* building the current target; but that target ran
@ -250,7 +280,10 @@ def main(targets, shouldbuildfunc):
break break
f = state.File(name=t) f = state.File(name=t)
lock = state.Lock(f.id) lock = state.Lock(f.id)
lock.trylock() if vars.UNLOCKED:
lock.owned = True
else:
lock.trylock()
if not lock.owned: if not lock.owned:
if vars.DEBUG_LOCKS: if vars.DEBUG_LOCKS:
log('%s (locked...)\n' % _nice(t)) log('%s (locked...)\n' % _nice(t))

View file

@ -6,46 +6,82 @@ from helpers import debug, debug2, err, unlink
def _nice(t): def _nice(t):
return state.relpath(os.path.join(vars.BASE, t), vars.STARTDIR) return state.relpath(os.path.join(vars.BASE, t), vars.STARTDIR)
CLEAN = 0
DIRTY = 1
def dirty_deps(f, depth, max_changed): def dirty_deps(f, depth, max_changed):
if vars.DEBUG >= 1: if vars.DEBUG >= 1:
debug('%s?%s\n' % (depth, _nice(f.name))) debug('%s?%s\n' % (depth, _nice(f.name)))
if f.failed_runid: if f.failed_runid:
debug('%s-- DIRTY (failed last time)\n' % depth) debug('%s-- DIRTY (failed last time)\n' % depth)
return True return DIRTY
if f.changed_runid == None: if f.changed_runid == None:
debug('%s-- DIRTY (never built)\n' % depth) debug('%s-- DIRTY (never built)\n' % depth)
return True return DIRTY
if f.changed_runid > max_changed: if f.changed_runid > max_changed:
debug('%s-- DIRTY (built)\n' % depth) debug('%s-- DIRTY (built)\n' % depth)
return True # has been built more recently than parent return DIRTY # has been built more recently than parent
if f.is_checked(): if f.is_checked():
if vars.DEBUG >= 1: debug('%s-- CLEAN (checked)\n' % depth) if vars.DEBUG >= 1:
return False # has already been checked during this session debug('%s-- CLEAN (checked)\n' % depth)
return CLEAN # has already been checked during this session
if not f.stamp: if not f.stamp:
debug('%s-- DIRTY (no stamp)\n' % depth) debug('%s-- DIRTY (no stamp)\n' % depth)
return True return DIRTY
if f.stamp != f.read_stamp(): if f.stamp != f.read_stamp():
debug('%s-- DIRTY (mtime)\n' % depth) debug('%s-- DIRTY (mtime)\n' % depth)
return True return DIRTY
must_build = []
for mode,f2 in f.deps(): for mode,f2 in f.deps():
dirty = CLEAN
if mode == 'c': if mode == 'c':
if os.path.exists(os.path.join(vars.BASE, f2.name)): if os.path.exists(os.path.join(vars.BASE, f2.name)):
debug('%s-- DIRTY (created)\n' % depth) debug('%s-- DIRTY (created)\n' % depth)
return True dirty = DIRTY
elif mode == 'm': elif mode == 'm':
if dirty_deps(f2, depth = depth + ' ', sub = dirty_deps(f2, depth = depth + ' ',
max_changed = max(f.changed_runid, f.checked_runid)): max_changed = max(f.changed_runid,
f.checked_runid))
if sub:
debug('%s-- DIRTY (sub)\n' % depth) debug('%s-- DIRTY (sub)\n' % depth)
return True dirty = sub
else:
assert(mode in ('c','m'))
if not f.csum:
# f is a "normal" target: dirty f2 means f is instantly dirty
if dirty:
# if dirty==DIRTY, this means f is definitely dirty.
# if dirty==[...], it's a list of the uncertain children.
return dirty
else:
# f is "checksummable": dirty f2 means f needs to redo,
# but f might turn out to be clean after that (ie. our parent
# might not be dirty).
if dirty == DIRTY:
# f2 is definitely dirty, so f definitely needs to
# redo. However, after that, f might turn out to be
# unchanged.
return [f]
elif isinstance(dirty,list):
# our child f2 might be dirty, but it's not sure yet. It's
# given us a list of targets we have to redo in order to
# be sure.
must_build += dirty
if must_build:
# f is *maybe* dirty because at least one of its children is maybe
# dirty. must_build has accumulated a list of "topmost" uncertain
# objects in the tree. If we build all those, we can then
# redo-ifchange f and it won't have any uncertainty next time.
return must_build
# if we get here, it's because the target is clean
if f.is_override: if f.is_override:
builder.warn_override(f.name) builder.warn_override(f.name)
f.set_checked() f.set_checked()
f.save() f.save()
return False return CLEAN
def should_build(t): def should_build(t):
@ -63,9 +99,10 @@ try:
debug2('TARGET: %r %r %r\n' % (vars.STARTDIR, vars.PWD, vars.TARGET)) debug2('TARGET: %r %r %r\n' % (vars.STARTDIR, vars.PWD, vars.TARGET))
try: try:
targets = sys.argv[1:] targets = sys.argv[1:]
for t in targets: if not vars.UNLOCKED:
f.add_dep('m', t) for t in targets:
f.save() f.add_dep('m', t)
f.save()
rv = builder.main(targets, should_build) rv = builder.main(targets, should_build)
finally: finally:
jwack.force_return_tokens() jwack.force_return_tokens()

1
redo-oob Symbolic link
View file

@ -0,0 +1 @@
redo-oob.py

24
redo-oob.py Executable file
View file

@ -0,0 +1,24 @@
#!/usr/bin/python
import sys, os
import state
from helpers import err
if len(sys.argv[1:]) < 2:
err('%s: at least 2 arguments expected.\n' % sys.argv[0])
sys.exit(1)
target = sys.argv[1]
deps = sys.argv[2:]
me = state.File(name=target)
argv = ['redo'] + deps
rv = os.spawnvp(os.P_WAIT, argv[0], argv)
if rv:
sys.exit(rv)
os.environ['REDO_UNLOCKED'] = '1'
argv = ['redo-ifchange', target]
rv = os.spawnvp(os.P_WAIT, argv[0], argv)
if rv:
sys.exit(rv)

View file

@ -22,19 +22,19 @@ redo-ifchange usestamp
redo bob redo bob
redo-ifchange usestamp redo-ifchange usestamp
[ "$(wc -l <stampy.log)" -eq 3 ] || exit 43 [ "$(wc -l <stampy.log)" -eq 3 ] || exit 43
[ "$(wc -l <usestamp.log)" -eq 2 ] || exit 44 [ "$(wc -l <usestamp.log)" -eq 1 ] || exit 44
../flush-cache.sh ../flush-cache.sh
redo-ifchange usestamp redo-ifchange usestamp
[ "$(wc -l <stampy.log)" -eq 3 ] || exit 45 [ "$(wc -l <stampy.log)" -eq 3 ] || exit 45
[ "$(wc -l <usestamp.log)" -eq 2 ] || exit 46 [ "$(wc -l <usestamp.log)" -eq 1 ] || exit 46
../flush-cache.sh ../flush-cache.sh
echo two >inp echo two >inp
redo stampy redo stampy
[ "$(wc -l <stampy.log)" -eq 4 ] || exit 51 [ "$(wc -l <stampy.log)" -eq 4 ] || exit 51
[ "$(wc -l <usestamp.log)" -eq 2 ] || exit 52 [ "$(wc -l <usestamp.log)" -eq 1 ] || exit 52
redo-ifchange usestamp redo-ifchange usestamp
[ "$(wc -l <stampy.log)" -eq 4 ] || exit 61 [ "$(wc -l <stampy.log)" -eq 4 ] || exit 61
[ "$(wc -l <usestamp.log)" -eq 3 ] || exit 62 [ "$(wc -l <usestamp.log)" -eq 2 ] || exit 62

View file

@ -22,3 +22,6 @@ RUNID = atoi.atoi(os.environ.get('REDO_RUNID')) or None
BASE = os.environ['REDO_BASE'] BASE = os.environ['REDO_BASE']
while BASE and BASE.endswith('/'): while BASE and BASE.endswith('/'):
BASE = BASE[:-1] BASE = BASE[:-1]
UNLOCKED = os.environ.get('REDO_UNLOCKED', '') and 1 or 0
os.environ['REDO_UNLOCKED'] = '' # not inheritable by subprocesses