Further improve handling of symlink targets/deps.

In commit redo-0.11-4-g34669fb, we changed os.stat into os.lstat to avoid false positives in the "manual override" detector: a .do file that generates $3 as a symlink would trigger manual override if the *target* of that symlink ever changed, which is incorrect. Unfortunately using os.lstat() leads to a different problem: if X depends on Y and Y is a symlink to Z, then X would not be rebuilt when Z changes, which is clearly wrong. The fix is twofold: 1. read_stamp() should change on changes to both the link itself, *and* the target of the link. 2. We shouldn't mark a target as overridden under so many situations. We'll use *only* the primary mtime of the os.lstat(), not all the other bits in the stamp. Step 2 fixes a few other false positives also. For example, if you 'cp -a' a whole tree to another location, the st_ino of all the targets will change, which would trigger a mass of "manual override" warnings. Although a change in inode is sufficient to count an input as having changed (just to be extra safe), it should *not* be considered a manual override. Now we can distinguish between the two. Because the stamp format has changed, update the SCHEMA_VER field. I should have done this every other time I changed the stamp format, but I forgot. Sorry. That leads to spurious "manually modified" warnings after upgrading redo.
2018-11-21 07:19:20 -05:00 · 2018-11-21 07:19:20 -05:00 · 672b748394
commit 672b748394
parent 69890d0c57
9 changed files with 129 additions and 44 deletions
--- a/builder.py
+++ b/builder.py
@ -147,7 +147,7 @@ class BuildJob:
        newstamp = sf.read_stamp()
        if (sf.is_generated and
            newstamp != state.STAMP_MISSING and 
-            (sf.stamp != newstamp or sf.is_override)):
+            (sf.is_override or state.detect_override(sf.stamp, newstamp))):
                state.warn_override(_nice(t))
                if not sf.is_override:
                    warn('%s - old: %r\n' % (_nice(t), sf.stamp))
@ -345,7 +345,8 @@ class BuildJob:
            sf.is_override = False
            if sf.is_checked() or sf.is_changed():
                # it got checked during the run; someone ran redo-stamp.
-                # update_stamp would call set_changed(); we don't want that
+                # update_stamp would call set_changed(); we don't want that,
                # so only use read_stamp.
                sf.stamp = sf.read_stamp()
            else:
                sf.csum = None
--- a/redo-ifchange.py
+++ b/redo-ifchange.py
@ -1,5 +1,5 @@
 #!/usr/bin/env python2
-import sys, os, traceback
+import os, sys, traceback
 import vars_init
 vars_init.init(sys.argv[1:])
--- a/state.py
+++ b/state.py
@ -14,7 +14,7 @@ else:
    cmdline[0] = os.path.splitext(os.path.basename(cmdline[0]))[0]
    setproctitle(" ".join(cmdline))
-SCHEMA_VER=1
+SCHEMA_VER=2
 TIMEOUT=60
 ALWAYS='//ALWAYS'   # an invalid filename that is always marked as dirty
@ -60,10 +60,12 @@ def db():
            row = None
        ver = row and row[0] or None
        if ver != SCHEMA_VER:
-            err("state database: discarding v%s (wanted v%s)\n"
+            # Don't use err() here because this might happen before
-                % (ver, SCHEMA_VER))
+            # redo-log spawns.
-            must_create = True
+            sys.stderr.write('redo: %s: found v%s (expected v%s)\n'
-            _db = None
+                % (dbfile, ver, SCHEMA_VER))
            sys.stderr.write('redo: manually delete .redo dir to start over.\n')
            sys.exit(1)
    if must_create:
        unlink(dbfile)
        _db = _connect(dbfile)
@ -180,6 +182,24 @@ def target_relpath(t):
    return relpath(t, target_dir)
 def detect_override(stamp1, stamp2):
    """Determine if two stamps differ in a way that means manual override.
    When two stamps differ at all, that means the source is dirty and so we
    need to rebuild.  If they differ in mtime or size, then someone has surely
    edited the file, and we don't want to trample their changes.
    But if the only difference is something else (like ownership, st_mode,
    etc) then that might be a false positive; it's annoying to mark as
    overridden in that case, so we return False.  (It's still dirty though!)
    """
    if stamp1 == stamp2:
        return False
    crit1 = stamp1.split('-', 2)[0:2]
    crit2 = stamp2.split('-', 2)[0:2]
    return crit1 != crit2
 def warn_override(name):
    warn('%s - you modified it; skipping\n' % name)
@ -321,17 +341,34 @@ class File(object):
               "    (target, mode, source, delete_me) values (?,?,?,?)",
               [self.id, mode, src.id, False])
-    def read_stamp(self):
+    def _read_stamp_st(self, statfunc):
        try:
-            st = os.lstat(os.path.join(vars.BASE, self.name))
+            st = statfunc(os.path.join(vars.BASE, self.name))
        except OSError:
-            return STAMP_MISSING
+            return False, STAMP_MISSING
        if stat.S_ISDIR(st.st_mode):
-            return STAMP_DIR
+            # directories change too much; detect only existence.
            return False, STAMP_DIR
        else:
            # a "unique identifier" stamp for a regular file
-            return str((st.st_mtime, st.st_size, st.st_ino,
+            return (stat.S_ISLNK(st.st_mode),
-                        st.st_mode, st.st_uid, st.st_gid))
+                '-'.join(str(s) for s in
                         ('%.6f' % st.st_mtime, st.st_size, st.st_ino,
                          st.st_mode, st.st_uid, st.st_gid)))
    def read_stamp(self):
        is_link, pre = self._read_stamp_st(os.lstat)
        if is_link:
            # if we're a symlink, we actually care about the link object
            # itself, *and* the target of the link.  If either changes,
            # we're considered dirty.
            #
            # On the other hand, detect_override() doesn't care about the
            # target of the link, only the link itself.
            _, post = self._read_stamp_st(os.stat)
            return pre + '+' + post
        else:
            return pre
    def nicename(self):
        return relpath(os.path.join(vars.BASE, self.name), vars.STARTDIR)
--- a/t/101-atime/tick
+++ b/t/101-atime/tick
@ -1,6 +1,7 @@
 #!/usr/bin/env python2
 import time
-
+t2 = int(time.time()) + 1.0
-t1 = int(time.time())
+while 1:
-while int(time.time()) == t1:
+    t = time.time()
-    pass
+    if t >= t2: break
    time.sleep(t2 - t + 0.01)
--- a/t/360-symlinks/.gitignore
+++ b/t/360-symlinks/.gitignore
@ -1,4 +1,7 @@
 *.ran
 *.extra
 *.final
 a
 b
 *.[123]
 dir
--- a/t/360-symlinks/a.do
+++ b/t/360-symlinks/a.do
@ -1,4 +1,7 @@
-echo x >>a.ran
+printf x >>a.ran
-rm -f $2.extra
+rm -f dir/$2.1 $2.2 $2.3 $2.final
-echo foo >$2.extra
+echo foo >$2.final
-ln -s $2.extra $3
+ln -s $2.final $2.3
 ln -s $PWD/$2.3 $2.2
 ln -s ../$2.2 dir/$2.1
 ln -s dir/$2.1 $3
--- a/t/360-symlinks/all.do
+++ b/t/360-symlinks/all.do
@ -1,34 +1,73 @@
-rm -f a a.extra b b.ran
+rm -f a a.ran a.final b b.ran *.[123] dir/*.[123]
-d0=""
+mkdir -p dir
 reads() {
 	aold=$aval
 	bold=$bval
 	read aval <a.ran || :
 	read bval <b.ran || :
 }
 # Basic setup should build a and b
 aval=
 bval=
 redo a
 redo-ifchange b
-d1=$(cat b.ran)
+reads
-[ "$d0" != "$d1" ] || exit 11
+[ "$aold" != "$aval" ] || exit  11
 [ "$bold" != "$bval" ] || exit 111
 # b only rebuilds if a changes
 ../flush-cache
 redo-ifchange b
-d2=$(cat b.ran)
+reads
-[ "$d1" = "$d2" ] || exit 12
+[ "$aold" = "$aval" ] || exit  12
 [ "$bold" = "$bval" ] || exit 112
 . ../skip-if-minimal-do.sh
-# forcibly changing a should rebuild b.
+# forcibly building a should trigger rebuild of b, which depends on it.
-# a is already symlink to a.extra, but redo shouldn't care about the
+# Previous versions of redo would be upset that a.final had changed.
 # target of symlinks, so it shouldn't freak out that a.extra has changed.
 # Anyway, b should still rebuild because a was rebuilt.
 ../flush-cache
 redo a
 redo-ifchange b
-d3=$(cat b.ran)
+reads
-[ "$d2" != "$d3" ] || exit 13
+[ "$aold" != "$aval" ] || exit  13
 [ "$bold" != "$bval" ] || exit 113
-# Explicitly check that changing a's symlink target (a.extra) does *not*
+# a.final is the target of the a symlink.  We should notice when it changes,
-# trigger a rebuild of b, because b depends on the stamp of the symlink,
+# even if a was not rebuilt.  Although it does get rebuilt, because a's
-# not what the symlink points to.  In redo, you declare dependencies on
+# stamp is now different from the database.
-# specific filenames, not the things they happen to refer to.
+echo xx >>a.final
 ../flush-cache
 touch a.extra
 redo-ifchange b
-d4=$(cat b.ran)
+reads
-[ "$d3" = "$d4" ] || exit 14
+[ "$aold" != "$aval" ] || exit  14
 [ "$bold" != "$bval" ] || exit 114
 # We should also notice if a.final is removed.
 # Now a is a "dangling" symlink.
 rm -f a.final
 ../flush-cache
 redo-ifchange b
 reads
 [ "$aold" != "$aval" ] || exit  15
 [ "$bold" != "$bval" ] || exit 115
 # If the symlink becomes no-longer-dangling, that should be dirty too.
 echo "splash" >a.final
 ../flush-cache
 redo-ifchange b
 reads
 [ "$aold" != "$aval" ] || exit  16
 [ "$bold" != "$bval" ] || exit 116
 # We ought to know the difference between a, the symlink, and its target.
 # If a is replaced with a.final directly, that's a change.
 rm -f a
 mv a.final a
 ../flush-cache
 redo-ifchange b >/dev/null 2>&1  # hide "you changed it" message
 reads
 [ "$aold"  = "$aval" ] || exit  17  # manual override prevented rebuild
 [ "$bold" != "$bval" ] || exit 117
--- a/t/360-symlinks/b.do
+++ b/t/360-symlinks/b.do
@ -1,3 +1,3 @@
-echo x >>b.ran
+printf x >>b.ran
 redo-ifchange a
-cat a >$3
+cat a >$3 || :
--- a/t/360-symlinks/clean.do
+++ b/t/360-symlinks/clean.do
@ -1 +1,2 @@
-rm -f *~ .*~ a b *.extra *.ran
+rm -f *~ .*~ a b *.extra *.final *.ran dir/*.[123] *.[123]
 rm -rf dir
`@ -1 +1,2 @@`
	`rm -f ~ .~ a b .extra .ran`	`rm -f ~ .~ a b .extra .final .ran dir/.[123] *.[123]`
		`rm -rf dir`