Further improve handling of symlink targets/deps.

In commit redo-0.11-4-g34669fb, we changed os.stat into os.lstat to
avoid false positives in the "manual override" detector: a .do file
that generates $3 as a symlink would trigger manual override if the
*target* of that symlink ever changed, which is incorrect.

Unfortunately using os.lstat() leads to a different problem: if X
depends on Y and Y is a symlink to Z, then X would not be rebuilt when
Z changes, which is clearly wrong.

The fix is twofold:

1. read_stamp() should change on changes to both the link itself,
   *and* the target of the link.

2. We shouldn't mark a target as overridden under so many situations.
   We'll use *only* the primary mtime of the os.lstat(), not all the
   other bits in the stamp.

Step 2 fixes a few other false positives also.  For example, if you
'cp -a' a whole tree to another location, the st_ino of all the targets
will change, which would trigger a mass of "manual override" warnings.
Although a change in inode is sufficient to count an input as having
changed (just to be extra safe), it should *not* be considered a manual
override.  Now we can distinguish between the two.

Because the stamp format has changed, update the SCHEMA_VER field.  I
should have done this every other time I changed the stamp format, but
I forgot.  Sorry.  That leads to spurious "manually modified" warnings
after upgrading redo.
This commit is contained in:
Avery Pennarun 2018-11-21 07:19:20 -05:00
commit 672b748394
9 changed files with 129 additions and 44 deletions

View file

@ -147,7 +147,7 @@ class BuildJob:
newstamp = sf.read_stamp()
if (sf.is_generated and
newstamp != state.STAMP_MISSING and
(sf.stamp != newstamp or sf.is_override)):
(sf.is_override or state.detect_override(sf.stamp, newstamp))):
state.warn_override(_nice(t))
if not sf.is_override:
warn('%s - old: %r\n' % (_nice(t), sf.stamp))
@ -345,7 +345,8 @@ class BuildJob:
sf.is_override = False
if sf.is_checked() or sf.is_changed():
# it got checked during the run; someone ran redo-stamp.
# update_stamp would call set_changed(); we don't want that
# update_stamp would call set_changed(); we don't want that,
# so only use read_stamp.
sf.stamp = sf.read_stamp()
else:
sf.csum = None

View file

@ -1,5 +1,5 @@
#!/usr/bin/env python2
import sys, os, traceback
import os, sys, traceback
import vars_init
vars_init.init(sys.argv[1:])

View file

@ -14,7 +14,7 @@ else:
cmdline[0] = os.path.splitext(os.path.basename(cmdline[0]))[0]
setproctitle(" ".join(cmdline))
SCHEMA_VER=1
SCHEMA_VER=2
TIMEOUT=60
ALWAYS='//ALWAYS' # an invalid filename that is always marked as dirty
@ -60,10 +60,12 @@ def db():
row = None
ver = row and row[0] or None
if ver != SCHEMA_VER:
err("state database: discarding v%s (wanted v%s)\n"
% (ver, SCHEMA_VER))
must_create = True
_db = None
# Don't use err() here because this might happen before
# redo-log spawns.
sys.stderr.write('redo: %s: found v%s (expected v%s)\n'
% (dbfile, ver, SCHEMA_VER))
sys.stderr.write('redo: manually delete .redo dir to start over.\n')
sys.exit(1)
if must_create:
unlink(dbfile)
_db = _connect(dbfile)
@ -180,6 +182,24 @@ def target_relpath(t):
return relpath(t, target_dir)
def detect_override(stamp1, stamp2):
"""Determine if two stamps differ in a way that means manual override.
When two stamps differ at all, that means the source is dirty and so we
need to rebuild. If they differ in mtime or size, then someone has surely
edited the file, and we don't want to trample their changes.
But if the only difference is something else (like ownership, st_mode,
etc) then that might be a false positive; it's annoying to mark as
overridden in that case, so we return False. (It's still dirty though!)
"""
if stamp1 == stamp2:
return False
crit1 = stamp1.split('-', 2)[0:2]
crit2 = stamp2.split('-', 2)[0:2]
return crit1 != crit2
def warn_override(name):
warn('%s - you modified it; skipping\n' % name)
@ -321,17 +341,34 @@ class File(object):
" (target, mode, source, delete_me) values (?,?,?,?)",
[self.id, mode, src.id, False])
def read_stamp(self):
def _read_stamp_st(self, statfunc):
try:
st = os.lstat(os.path.join(vars.BASE, self.name))
st = statfunc(os.path.join(vars.BASE, self.name))
except OSError:
return STAMP_MISSING
return False, STAMP_MISSING
if stat.S_ISDIR(st.st_mode):
return STAMP_DIR
# directories change too much; detect only existence.
return False, STAMP_DIR
else:
# a "unique identifier" stamp for a regular file
return str((st.st_mtime, st.st_size, st.st_ino,
st.st_mode, st.st_uid, st.st_gid))
return (stat.S_ISLNK(st.st_mode),
'-'.join(str(s) for s in
('%.6f' % st.st_mtime, st.st_size, st.st_ino,
st.st_mode, st.st_uid, st.st_gid)))
def read_stamp(self):
is_link, pre = self._read_stamp_st(os.lstat)
if is_link:
# if we're a symlink, we actually care about the link object
# itself, *and* the target of the link. If either changes,
# we're considered dirty.
#
# On the other hand, detect_override() doesn't care about the
# target of the link, only the link itself.
_, post = self._read_stamp_st(os.stat)
return pre + '+' + post
else:
return pre
def nicename(self):
return relpath(os.path.join(vars.BASE, self.name), vars.STARTDIR)

View file

@ -1,6 +1,7 @@
#!/usr/bin/env python2
import time
t1 = int(time.time())
while int(time.time()) == t1:
pass
t2 = int(time.time()) + 1.0
while 1:
t = time.time()
if t >= t2: break
time.sleep(t2 - t + 0.01)

View file

@ -1,4 +1,7 @@
*.ran
*.extra
*.final
a
b
*.[123]
dir

View file

@ -1,4 +1,7 @@
echo x >>a.ran
rm -f $2.extra
echo foo >$2.extra
ln -s $2.extra $3
printf x >>a.ran
rm -f dir/$2.1 $2.2 $2.3 $2.final
echo foo >$2.final
ln -s $2.final $2.3
ln -s $PWD/$2.3 $2.2
ln -s ../$2.2 dir/$2.1
ln -s dir/$2.1 $3

View file

@ -1,34 +1,73 @@
rm -f a a.extra b b.ran
d0=""
rm -f a a.ran a.final b b.ran *.[123] dir/*.[123]
mkdir -p dir
reads() {
aold=$aval
bold=$bval
read aval <a.ran || :
read bval <b.ran || :
}
# Basic setup should build a and b
aval=
bval=
redo a
redo-ifchange b
d1=$(cat b.ran)
[ "$d0" != "$d1" ] || exit 11
reads
[ "$aold" != "$aval" ] || exit 11
[ "$bold" != "$bval" ] || exit 111
# b only rebuilds if a changes
../flush-cache
redo-ifchange b
d2=$(cat b.ran)
[ "$d1" = "$d2" ] || exit 12
reads
[ "$aold" = "$aval" ] || exit 12
[ "$bold" = "$bval" ] || exit 112
. ../skip-if-minimal-do.sh
# forcibly changing a should rebuild b.
# a is already symlink to a.extra, but redo shouldn't care about the
# target of symlinks, so it shouldn't freak out that a.extra has changed.
# Anyway, b should still rebuild because a was rebuilt.
# forcibly building a should trigger rebuild of b, which depends on it.
# Previous versions of redo would be upset that a.final had changed.
../flush-cache
redo a
redo-ifchange b
d3=$(cat b.ran)
[ "$d2" != "$d3" ] || exit 13
reads
[ "$aold" != "$aval" ] || exit 13
[ "$bold" != "$bval" ] || exit 113
# Explicitly check that changing a's symlink target (a.extra) does *not*
# trigger a rebuild of b, because b depends on the stamp of the symlink,
# not what the symlink points to. In redo, you declare dependencies on
# specific filenames, not the things they happen to refer to.
# a.final is the target of the a symlink. We should notice when it changes,
# even if a was not rebuilt. Although it does get rebuilt, because a's
# stamp is now different from the database.
echo xx >>a.final
../flush-cache
touch a.extra
redo-ifchange b
d4=$(cat b.ran)
[ "$d3" = "$d4" ] || exit 14
reads
[ "$aold" != "$aval" ] || exit 14
[ "$bold" != "$bval" ] || exit 114
# We should also notice if a.final is removed.
# Now a is a "dangling" symlink.
rm -f a.final
../flush-cache
redo-ifchange b
reads
[ "$aold" != "$aval" ] || exit 15
[ "$bold" != "$bval" ] || exit 115
# If the symlink becomes no-longer-dangling, that should be dirty too.
echo "splash" >a.final
../flush-cache
redo-ifchange b
reads
[ "$aold" != "$aval" ] || exit 16
[ "$bold" != "$bval" ] || exit 116
# We ought to know the difference between a, the symlink, and its target.
# If a is replaced with a.final directly, that's a change.
rm -f a
mv a.final a
../flush-cache
redo-ifchange b >/dev/null 2>&1 # hide "you changed it" message
reads
[ "$aold" = "$aval" ] || exit 17 # manual override prevented rebuild
[ "$bold" != "$bval" ] || exit 117

View file

@ -1,3 +1,3 @@
echo x >>b.ran
printf x >>b.ran
redo-ifchange a
cat a >$3
cat a >$3 || :

View file

@ -1 +1,2 @@
rm -f *~ .*~ a b *.extra *.ran
rm -f *~ .*~ a b *.extra *.final *.ran dir/*.[123] *.[123]
rm -rf dir