Further improve handling of symlink targets/deps.

In commit redo-0.11-4-g34669fb, we changed os.stat into os.lstat to
avoid false positives in the "manual override" detector: a .do file
that generates $3 as a symlink would trigger manual override if the
*target* of that symlink ever changed, which is incorrect.

Unfortunately using os.lstat() leads to a different problem: if X
depends on Y and Y is a symlink to Z, then X would not be rebuilt when
Z changes, which is clearly wrong.

The fix is twofold:

1. read_stamp() should change on changes to both the link itself,
   *and* the target of the link.

2. We shouldn't mark a target as overridden under so many situations.
   We'll use *only* the primary mtime of the os.lstat(), not all the
   other bits in the stamp.

Step 2 fixes a few other false positives also.  For example, if you
'cp -a' a whole tree to another location, the st_ino of all the targets
will change, which would trigger a mass of "manual override" warnings.
Although a change in inode is sufficient to count an input as having
changed (just to be extra safe), it should *not* be considered a manual
override.  Now we can distinguish between the two.

Because the stamp format has changed, update the SCHEMA_VER field.  I
should have done this every other time I changed the stamp format, but
I forgot.  Sorry.  That leads to spurious "manually modified" warnings
after upgrading redo.
This commit is contained in:
Avery Pennarun 2018-11-21 07:19:20 -05:00
commit 672b748394
9 changed files with 129 additions and 44 deletions

View file

@ -147,7 +147,7 @@ class BuildJob:
newstamp = sf.read_stamp() newstamp = sf.read_stamp()
if (sf.is_generated and if (sf.is_generated and
newstamp != state.STAMP_MISSING and newstamp != state.STAMP_MISSING and
(sf.stamp != newstamp or sf.is_override)): (sf.is_override or state.detect_override(sf.stamp, newstamp))):
state.warn_override(_nice(t)) state.warn_override(_nice(t))
if not sf.is_override: if not sf.is_override:
warn('%s - old: %r\n' % (_nice(t), sf.stamp)) warn('%s - old: %r\n' % (_nice(t), sf.stamp))
@ -345,7 +345,8 @@ class BuildJob:
sf.is_override = False sf.is_override = False
if sf.is_checked() or sf.is_changed(): if sf.is_checked() or sf.is_changed():
# it got checked during the run; someone ran redo-stamp. # it got checked during the run; someone ran redo-stamp.
# update_stamp would call set_changed(); we don't want that # update_stamp would call set_changed(); we don't want that,
# so only use read_stamp.
sf.stamp = sf.read_stamp() sf.stamp = sf.read_stamp()
else: else:
sf.csum = None sf.csum = None

View file

@ -1,5 +1,5 @@
#!/usr/bin/env python2 #!/usr/bin/env python2
import sys, os, traceback import os, sys, traceback
import vars_init import vars_init
vars_init.init(sys.argv[1:]) vars_init.init(sys.argv[1:])

View file

@ -14,7 +14,7 @@ else:
cmdline[0] = os.path.splitext(os.path.basename(cmdline[0]))[0] cmdline[0] = os.path.splitext(os.path.basename(cmdline[0]))[0]
setproctitle(" ".join(cmdline)) setproctitle(" ".join(cmdline))
SCHEMA_VER=1 SCHEMA_VER=2
TIMEOUT=60 TIMEOUT=60
ALWAYS='//ALWAYS' # an invalid filename that is always marked as dirty ALWAYS='//ALWAYS' # an invalid filename that is always marked as dirty
@ -60,10 +60,12 @@ def db():
row = None row = None
ver = row and row[0] or None ver = row and row[0] or None
if ver != SCHEMA_VER: if ver != SCHEMA_VER:
err("state database: discarding v%s (wanted v%s)\n" # Don't use err() here because this might happen before
% (ver, SCHEMA_VER)) # redo-log spawns.
must_create = True sys.stderr.write('redo: %s: found v%s (expected v%s)\n'
_db = None % (dbfile, ver, SCHEMA_VER))
sys.stderr.write('redo: manually delete .redo dir to start over.\n')
sys.exit(1)
if must_create: if must_create:
unlink(dbfile) unlink(dbfile)
_db = _connect(dbfile) _db = _connect(dbfile)
@ -180,6 +182,24 @@ def target_relpath(t):
return relpath(t, target_dir) return relpath(t, target_dir)
def detect_override(stamp1, stamp2):
"""Determine if two stamps differ in a way that means manual override.
When two stamps differ at all, that means the source is dirty and so we
need to rebuild. If they differ in mtime or size, then someone has surely
edited the file, and we don't want to trample their changes.
But if the only difference is something else (like ownership, st_mode,
etc) then that might be a false positive; it's annoying to mark as
overridden in that case, so we return False. (It's still dirty though!)
"""
if stamp1 == stamp2:
return False
crit1 = stamp1.split('-', 2)[0:2]
crit2 = stamp2.split('-', 2)[0:2]
return crit1 != crit2
def warn_override(name): def warn_override(name):
warn('%s - you modified it; skipping\n' % name) warn('%s - you modified it; skipping\n' % name)
@ -321,17 +341,34 @@ class File(object):
" (target, mode, source, delete_me) values (?,?,?,?)", " (target, mode, source, delete_me) values (?,?,?,?)",
[self.id, mode, src.id, False]) [self.id, mode, src.id, False])
def read_stamp(self): def _read_stamp_st(self, statfunc):
try: try:
st = os.lstat(os.path.join(vars.BASE, self.name)) st = statfunc(os.path.join(vars.BASE, self.name))
except OSError: except OSError:
return STAMP_MISSING return False, STAMP_MISSING
if stat.S_ISDIR(st.st_mode): if stat.S_ISDIR(st.st_mode):
return STAMP_DIR # directories change too much; detect only existence.
return False, STAMP_DIR
else: else:
# a "unique identifier" stamp for a regular file # a "unique identifier" stamp for a regular file
return str((st.st_mtime, st.st_size, st.st_ino, return (stat.S_ISLNK(st.st_mode),
st.st_mode, st.st_uid, st.st_gid)) '-'.join(str(s) for s in
('%.6f' % st.st_mtime, st.st_size, st.st_ino,
st.st_mode, st.st_uid, st.st_gid)))
def read_stamp(self):
is_link, pre = self._read_stamp_st(os.lstat)
if is_link:
# if we're a symlink, we actually care about the link object
# itself, *and* the target of the link. If either changes,
# we're considered dirty.
#
# On the other hand, detect_override() doesn't care about the
# target of the link, only the link itself.
_, post = self._read_stamp_st(os.stat)
return pre + '+' + post
else:
return pre
def nicename(self): def nicename(self):
return relpath(os.path.join(vars.BASE, self.name), vars.STARTDIR) return relpath(os.path.join(vars.BASE, self.name), vars.STARTDIR)

View file

@ -1,6 +1,7 @@
#!/usr/bin/env python2 #!/usr/bin/env python2
import time import time
t2 = int(time.time()) + 1.0
t1 = int(time.time()) while 1:
while int(time.time()) == t1: t = time.time()
pass if t >= t2: break
time.sleep(t2 - t + 0.01)

View file

@ -1,4 +1,7 @@
*.ran *.ran
*.extra *.extra
*.final
a a
b b
*.[123]
dir

View file

@ -1,4 +1,7 @@
echo x >>a.ran printf x >>a.ran
rm -f $2.extra rm -f dir/$2.1 $2.2 $2.3 $2.final
echo foo >$2.extra echo foo >$2.final
ln -s $2.extra $3 ln -s $2.final $2.3
ln -s $PWD/$2.3 $2.2
ln -s ../$2.2 dir/$2.1
ln -s dir/$2.1 $3

View file

@ -1,34 +1,73 @@
rm -f a a.extra b b.ran rm -f a a.ran a.final b b.ran *.[123] dir/*.[123]
d0="" mkdir -p dir
reads() {
aold=$aval
bold=$bval
read aval <a.ran || :
read bval <b.ran || :
}
# Basic setup should build a and b
aval=
bval=
redo a redo a
redo-ifchange b redo-ifchange b
d1=$(cat b.ran) reads
[ "$d0" != "$d1" ] || exit 11 [ "$aold" != "$aval" ] || exit 11
[ "$bold" != "$bval" ] || exit 111
# b only rebuilds if a changes # b only rebuilds if a changes
../flush-cache ../flush-cache
redo-ifchange b redo-ifchange b
d2=$(cat b.ran) reads
[ "$d1" = "$d2" ] || exit 12 [ "$aold" = "$aval" ] || exit 12
[ "$bold" = "$bval" ] || exit 112
. ../skip-if-minimal-do.sh . ../skip-if-minimal-do.sh
# forcibly changing a should rebuild b. # forcibly building a should trigger rebuild of b, which depends on it.
# a is already symlink to a.extra, but redo shouldn't care about the # Previous versions of redo would be upset that a.final had changed.
# target of symlinks, so it shouldn't freak out that a.extra has changed.
# Anyway, b should still rebuild because a was rebuilt.
../flush-cache ../flush-cache
redo a redo a
redo-ifchange b redo-ifchange b
d3=$(cat b.ran) reads
[ "$d2" != "$d3" ] || exit 13 [ "$aold" != "$aval" ] || exit 13
[ "$bold" != "$bval" ] || exit 113
# Explicitly check that changing a's symlink target (a.extra) does *not* # a.final is the target of the a symlink. We should notice when it changes,
# trigger a rebuild of b, because b depends on the stamp of the symlink, # even if a was not rebuilt. Although it does get rebuilt, because a's
# not what the symlink points to. In redo, you declare dependencies on # stamp is now different from the database.
# specific filenames, not the things they happen to refer to. echo xx >>a.final
../flush-cache ../flush-cache
touch a.extra
redo-ifchange b redo-ifchange b
d4=$(cat b.ran) reads
[ "$d3" = "$d4" ] || exit 14 [ "$aold" != "$aval" ] || exit 14
[ "$bold" != "$bval" ] || exit 114
# We should also notice if a.final is removed.
# Now a is a "dangling" symlink.
rm -f a.final
../flush-cache
redo-ifchange b
reads
[ "$aold" != "$aval" ] || exit 15
[ "$bold" != "$bval" ] || exit 115
# If the symlink becomes no-longer-dangling, that should be dirty too.
echo "splash" >a.final
../flush-cache
redo-ifchange b
reads
[ "$aold" != "$aval" ] || exit 16
[ "$bold" != "$bval" ] || exit 116
# We ought to know the difference between a, the symlink, and its target.
# If a is replaced with a.final directly, that's a change.
rm -f a
mv a.final a
../flush-cache
redo-ifchange b >/dev/null 2>&1 # hide "you changed it" message
reads
[ "$aold" = "$aval" ] || exit 17 # manual override prevented rebuild
[ "$bold" != "$bval" ] || exit 117

View file

@ -1,3 +1,3 @@
echo x >>b.ran printf x >>b.ran
redo-ifchange a redo-ifchange a
cat a >$3 cat a >$3 || :

View file

@ -1 +1,2 @@
rm -f *~ .*~ a b *.extra *.ran rm -f *~ .*~ a b *.extra *.final *.ran dir/*.[123] *.[123]
rm -rf dir