From 672b748394d8d0bdcc5362a5d466c8a4b98115cb Mon Sep 17 00:00:00 2001 From: Avery Pennarun Date: Wed, 21 Nov 2018 07:19:20 -0500 Subject: [PATCH] Further improve handling of symlink targets/deps. In commit redo-0.11-4-g34669fb, we changed os.stat into os.lstat to avoid false positives in the "manual override" detector: a .do file that generates $3 as a symlink would trigger manual override if the *target* of that symlink ever changed, which is incorrect. Unfortunately using os.lstat() leads to a different problem: if X depends on Y and Y is a symlink to Z, then X would not be rebuilt when Z changes, which is clearly wrong. The fix is twofold: 1. read_stamp() should change on changes to both the link itself, *and* the target of the link. 2. We shouldn't mark a target as overridden under so many situations. We'll use *only* the primary mtime of the os.lstat(), not all the other bits in the stamp. Step 2 fixes a few other false positives also. For example, if you 'cp -a' a whole tree to another location, the st_ino of all the targets will change, which would trigger a mass of "manual override" warnings. Although a change in inode is sufficient to count an input as having changed (just to be extra safe), it should *not* be considered a manual override. Now we can distinguish between the two. Because the stamp format has changed, update the SCHEMA_VER field. I should have done this every other time I changed the stamp format, but I forgot. Sorry. That leads to spurious "manually modified" warnings after upgrading redo. --- builder.py | 5 ++- redo-ifchange.py | 2 +- state.py | 59 ++++++++++++++++++++++++------ t/101-atime/tick | 9 +++-- t/360-symlinks/.gitignore | 3 ++ t/360-symlinks/a.do | 11 ++++-- t/360-symlinks/all.do | 77 +++++++++++++++++++++++++++++---------- t/360-symlinks/b.do | 4 +- t/360-symlinks/clean.do | 3 +- 9 files changed, 129 insertions(+), 44 deletions(-) diff --git a/builder.py b/builder.py index 3263bdf..8662d80 100644 --- a/builder.py +++ b/builder.py @@ -147,7 +147,7 @@ class BuildJob: newstamp = sf.read_stamp() if (sf.is_generated and newstamp != state.STAMP_MISSING and - (sf.stamp != newstamp or sf.is_override)): + (sf.is_override or state.detect_override(sf.stamp, newstamp))): state.warn_override(_nice(t)) if not sf.is_override: warn('%s - old: %r\n' % (_nice(t), sf.stamp)) @@ -345,7 +345,8 @@ class BuildJob: sf.is_override = False if sf.is_checked() or sf.is_changed(): # it got checked during the run; someone ran redo-stamp. - # update_stamp would call set_changed(); we don't want that + # update_stamp would call set_changed(); we don't want that, + # so only use read_stamp. sf.stamp = sf.read_stamp() else: sf.csum = None diff --git a/redo-ifchange.py b/redo-ifchange.py index aae4212..d4d7cb7 100755 --- a/redo-ifchange.py +++ b/redo-ifchange.py @@ -1,5 +1,5 @@ #!/usr/bin/env python2 -import sys, os, traceback +import os, sys, traceback import vars_init vars_init.init(sys.argv[1:]) diff --git a/state.py b/state.py index 9799748..72e76bd 100644 --- a/state.py +++ b/state.py @@ -14,7 +14,7 @@ else: cmdline[0] = os.path.splitext(os.path.basename(cmdline[0]))[0] setproctitle(" ".join(cmdline)) -SCHEMA_VER=1 +SCHEMA_VER=2 TIMEOUT=60 ALWAYS='//ALWAYS' # an invalid filename that is always marked as dirty @@ -60,10 +60,12 @@ def db(): row = None ver = row and row[0] or None if ver != SCHEMA_VER: - err("state database: discarding v%s (wanted v%s)\n" - % (ver, SCHEMA_VER)) - must_create = True - _db = None + # Don't use err() here because this might happen before + # redo-log spawns. + sys.stderr.write('redo: %s: found v%s (expected v%s)\n' + % (dbfile, ver, SCHEMA_VER)) + sys.stderr.write('redo: manually delete .redo dir to start over.\n') + sys.exit(1) if must_create: unlink(dbfile) _db = _connect(dbfile) @@ -180,6 +182,24 @@ def target_relpath(t): return relpath(t, target_dir) +def detect_override(stamp1, stamp2): + """Determine if two stamps differ in a way that means manual override. + + When two stamps differ at all, that means the source is dirty and so we + need to rebuild. If they differ in mtime or size, then someone has surely + edited the file, and we don't want to trample their changes. + + But if the only difference is something else (like ownership, st_mode, + etc) then that might be a false positive; it's annoying to mark as + overridden in that case, so we return False. (It's still dirty though!) + """ + if stamp1 == stamp2: + return False + crit1 = stamp1.split('-', 2)[0:2] + crit2 = stamp2.split('-', 2)[0:2] + return crit1 != crit2 + + def warn_override(name): warn('%s - you modified it; skipping\n' % name) @@ -321,17 +341,34 @@ class File(object): " (target, mode, source, delete_me) values (?,?,?,?)", [self.id, mode, src.id, False]) - def read_stamp(self): + def _read_stamp_st(self, statfunc): try: - st = os.lstat(os.path.join(vars.BASE, self.name)) + st = statfunc(os.path.join(vars.BASE, self.name)) except OSError: - return STAMP_MISSING + return False, STAMP_MISSING if stat.S_ISDIR(st.st_mode): - return STAMP_DIR + # directories change too much; detect only existence. + return False, STAMP_DIR else: # a "unique identifier" stamp for a regular file - return str((st.st_mtime, st.st_size, st.st_ino, - st.st_mode, st.st_uid, st.st_gid)) + return (stat.S_ISLNK(st.st_mode), + '-'.join(str(s) for s in + ('%.6f' % st.st_mtime, st.st_size, st.st_ino, + st.st_mode, st.st_uid, st.st_gid))) + + def read_stamp(self): + is_link, pre = self._read_stamp_st(os.lstat) + if is_link: + # if we're a symlink, we actually care about the link object + # itself, *and* the target of the link. If either changes, + # we're considered dirty. + # + # On the other hand, detect_override() doesn't care about the + # target of the link, only the link itself. + _, post = self._read_stamp_st(os.stat) + return pre + '+' + post + else: + return pre def nicename(self): return relpath(os.path.join(vars.BASE, self.name), vars.STARTDIR) diff --git a/t/101-atime/tick b/t/101-atime/tick index 787574a..269165e 100755 --- a/t/101-atime/tick +++ b/t/101-atime/tick @@ -1,6 +1,7 @@ #!/usr/bin/env python2 import time - -t1 = int(time.time()) -while int(time.time()) == t1: - pass +t2 = int(time.time()) + 1.0 +while 1: + t = time.time() + if t >= t2: break + time.sleep(t2 - t + 0.01) diff --git a/t/360-symlinks/.gitignore b/t/360-symlinks/.gitignore index 063a5c1..3579e48 100644 --- a/t/360-symlinks/.gitignore +++ b/t/360-symlinks/.gitignore @@ -1,4 +1,7 @@ *.ran *.extra +*.final a b +*.[123] +dir diff --git a/t/360-symlinks/a.do b/t/360-symlinks/a.do index b7bfe87..a16ed7f 100644 --- a/t/360-symlinks/a.do +++ b/t/360-symlinks/a.do @@ -1,4 +1,7 @@ -echo x >>a.ran -rm -f $2.extra -echo foo >$2.extra -ln -s $2.extra $3 +printf x >>a.ran +rm -f dir/$2.1 $2.2 $2.3 $2.final +echo foo >$2.final +ln -s $2.final $2.3 +ln -s $PWD/$2.3 $2.2 +ln -s ../$2.2 dir/$2.1 +ln -s dir/$2.1 $3 diff --git a/t/360-symlinks/all.do b/t/360-symlinks/all.do index 13ca7a3..6d6c455 100644 --- a/t/360-symlinks/all.do +++ b/t/360-symlinks/all.do @@ -1,34 +1,73 @@ -rm -f a a.extra b b.ran -d0="" +rm -f a a.ran a.final b b.ran *.[123] dir/*.[123] +mkdir -p dir + +reads() { + aold=$aval + bold=$bval + read aval >a.final ../flush-cache -touch a.extra redo-ifchange b -d4=$(cat b.ran) -[ "$d3" = "$d4" ] || exit 14 +reads +[ "$aold" != "$aval" ] || exit 14 +[ "$bold" != "$bval" ] || exit 114 + +# We should also notice if a.final is removed. +# Now a is a "dangling" symlink. +rm -f a.final +../flush-cache +redo-ifchange b +reads +[ "$aold" != "$aval" ] || exit 15 +[ "$bold" != "$bval" ] || exit 115 + +# If the symlink becomes no-longer-dangling, that should be dirty too. +echo "splash" >a.final +../flush-cache +redo-ifchange b +reads +[ "$aold" != "$aval" ] || exit 16 +[ "$bold" != "$bval" ] || exit 116 + +# We ought to know the difference between a, the symlink, and its target. +# If a is replaced with a.final directly, that's a change. +rm -f a +mv a.final a +../flush-cache +redo-ifchange b >/dev/null 2>&1 # hide "you changed it" message +reads +[ "$aold" = "$aval" ] || exit 17 # manual override prevented rebuild +[ "$bold" != "$bval" ] || exit 117 diff --git a/t/360-symlinks/b.do b/t/360-symlinks/b.do index b5a3314..5628919 100644 --- a/t/360-symlinks/b.do +++ b/t/360-symlinks/b.do @@ -1,3 +1,3 @@ -echo x >>b.ran +printf x >>b.ran redo-ifchange a -cat a >$3 +cat a >$3 || : diff --git a/t/360-symlinks/clean.do b/t/360-symlinks/clean.do index a819450..f04d1f9 100644 --- a/t/360-symlinks/clean.do +++ b/t/360-symlinks/clean.do @@ -1 +1,2 @@ -rm -f *~ .*~ a b *.extra *.ran +rm -f *~ .*~ a b *.extra *.final *.ran dir/*.[123] *.[123] +rm -rf dir