2018-12-14 08:38:53 +00:00
|
|
|
"""Code for manipulating redo's state database."""
|
2018-12-02 23:15:37 -05:00
|
|
|
import sys, os, errno, stat, fcntl, sqlite3
|
2018-12-05 02:34:36 -05:00
|
|
|
from . import cycles, env
|
2019-01-18 00:06:18 +00:00
|
|
|
from .helpers import unlink, close_on_exec
|
2018-12-05 02:34:36 -05:00
|
|
|
from .logs import warn, debug2, debug3
|
2010-11-21 02:08:05 -08:00
|
|
|
|
2018-12-02 23:15:37 -05:00
|
|
|
SCHEMA_VER = 2
|
|
|
|
|
TIMEOUT = 60
|
2010-12-07 02:17:22 -08:00
|
|
|
|
2018-12-02 23:15:37 -05:00
|
|
|
ALWAYS = '//ALWAYS' # an invalid filename that is always marked as dirty
|
|
|
|
|
STAMP_DIR = 'dir' # the stamp of a directory; mtime is unhelpful
|
|
|
|
|
STAMP_MISSING = '0' # the stamp of a nonexistent file
|
2010-12-10 22:42:33 -08:00
|
|
|
|
2018-12-02 23:15:37 -05:00
|
|
|
LOG_LOCK_MAGIC = 0x10000000 # fid offset for "log locks"
|
redo-log: prioritize the "foreground" process.
When running a parallel build, redo-log -f (which is auto-started by
redo) tries to traverse through the logs depth first, in the order
parent processes started subprocesses. This works pretty well, but if
its dependencies are locked, a process might have to give up its
jobserver token while other stuff builds its dependencies. After the
dependency finishes, the parent might not be able to get a token for
quite some time, and the logs will appear to stop.
To prevent this from happening, we can instantiate up to one "cheater"
token, only in the foreground process (the one locked by redo-log -f),
which will allow it to continue running, albeit a bit slowly (since it
only has one token out of possibly many). When the process finishes,
we then destroy the fake token. It gets a little complicated; see
explanation at the top of jwack.py.
2018-11-17 04:32:09 -05:00
|
|
|
|
2010-12-10 22:42:33 -08:00
|
|
|
|
2010-12-09 04:58:05 -08:00
|
|
|
def _connect(dbfile):
|
|
|
|
|
_db = sqlite3.connect(dbfile, timeout=TIMEOUT)
|
|
|
|
|
_db.execute("pragma synchronous = off")
|
Workaround for completely broken file locking on Windows 10 WSL.
WSL (Windows Services for Linux) provides a Linux-kernel-compatible ABI
for userspace processes, but the current version doesn't not implement
fcntl() locks at all; it just always returns success. See
https://github.com/Microsoft/WSL/issues/1927.
This causes us three kinds of problem:
1. sqlite3 in WAL mode gives "OperationalError: locking protocol".
1b. Other sqlite3 journal modes also don't work when used by
multiple processes.
2. redo parallelism doesn't work, because we can't prevent the same
target from being build several times simultaneously.
3. "redo-log -f" doesn't work, since it can't tell whether the log
file it's tailing is "done" or not.
To fix #1, we switch the sqlite3 journal back to PERSIST instead of
WAL. We originally changed to WAL in commit 5156feae9d to reduce
deadlocks on MacOS. That was never adequately explained, but PERSIST
still acts weird on MacOS, so we'll only switch to PERSIST when we
detect that locking is definitely broken. Sigh.
To (mostly) fix #2, we disable any -j value > 1 when locking is broken.
This prevents basic forms of parallelism, but doesn't stop you from
re-entrantly starting other instances of redo. To fix that properly,
we need to switch to a different locking mechanism entirely, which is
tough in python. flock() locks probably work, for example, but
python's locks lie and just use fcntl locks for those.
To fix #3, we always force --no-log mode when we find that locking is
broken.
2019-01-02 14:18:51 -05:00
|
|
|
# Some old/broken versions of pysqlite on MacOS work badly with journal
|
|
|
|
|
# mode PERSIST. But WAL fails on Windows WSL due to WSL's totally broken
|
|
|
|
|
# locking. On WSL, at least PERSIST works in single-threaded mode, so
|
|
|
|
|
# if we're careful we can use it, more or less.
|
|
|
|
|
jmode = 'PERSIST' if env.v.LOCKS_BROKEN else 'WAL'
|
|
|
|
|
_db.execute("pragma journal_mode = %s" % (jmode,))
|
2011-02-14 22:41:43 +11:00
|
|
|
_db.text_factory = str
|
2010-12-09 04:58:05 -08:00
|
|
|
return _db
|
|
|
|
|
|
|
|
|
|
|
2018-11-23 19:35:42 -05:00
|
|
|
# We need to keep a process-wide fd open for all access to the lock file.
|
|
|
|
|
# Because POSIX lock files are insane, if you close *one* fd pointing
|
|
|
|
|
# at a given inode, it will immediately release *all* locks on that inode from
|
|
|
|
|
# your pid, even if those locks are on a different fd. This is literally
|
|
|
|
|
# never what you want. To avoid the problem, always use just a single fd.
|
|
|
|
|
_lockfile = None
|
|
|
|
|
|
|
|
|
|
|
2010-12-07 02:17:22 -08:00
|
|
|
_db = None
|
|
|
|
|
def db():
|
2018-12-14 08:38:53 +00:00
|
|
|
"""Initialize the state database and return its object."""
|
2018-11-23 19:35:42 -05:00
|
|
|
global _db, _lockfile
|
2010-12-07 02:17:22 -08:00
|
|
|
if _db:
|
|
|
|
|
return _db
|
2018-12-02 23:15:37 -05:00
|
|
|
|
2018-12-05 01:07:16 -05:00
|
|
|
dbdir = '%s/.redo' % env.v.BASE
|
2010-12-07 02:17:22 -08:00
|
|
|
dbfile = '%s/db.sqlite3' % dbdir
|
2010-12-09 03:01:26 -08:00
|
|
|
try:
|
|
|
|
|
os.mkdir(dbdir)
|
|
|
|
|
except OSError, e:
|
|
|
|
|
if e.errno == errno.EEXIST:
|
|
|
|
|
pass # if it exists, that's okay
|
|
|
|
|
else:
|
|
|
|
|
raise
|
2010-12-10 02:58:13 -08:00
|
|
|
|
2018-12-05 01:07:16 -05:00
|
|
|
_lockfile = os.open(os.path.join(env.v.BASE, '.redo/locks'),
|
2018-11-23 19:35:42 -05:00
|
|
|
os.O_RDWR | os.O_CREAT, 0666)
|
|
|
|
|
close_on_exec(_lockfile, True)
|
Workaround for completely broken file locking on Windows 10 WSL.
WSL (Windows Services for Linux) provides a Linux-kernel-compatible ABI
for userspace processes, but the current version doesn't not implement
fcntl() locks at all; it just always returns success. See
https://github.com/Microsoft/WSL/issues/1927.
This causes us three kinds of problem:
1. sqlite3 in WAL mode gives "OperationalError: locking protocol".
1b. Other sqlite3 journal modes also don't work when used by
multiple processes.
2. redo parallelism doesn't work, because we can't prevent the same
target from being build several times simultaneously.
3. "redo-log -f" doesn't work, since it can't tell whether the log
file it's tailing is "done" or not.
To fix #1, we switch the sqlite3 journal back to PERSIST instead of
WAL. We originally changed to WAL in commit 5156feae9d to reduce
deadlocks on MacOS. That was never adequately explained, but PERSIST
still acts weird on MacOS, so we'll only switch to PERSIST when we
detect that locking is definitely broken. Sigh.
To (mostly) fix #2, we disable any -j value > 1 when locking is broken.
This prevents basic forms of parallelism, but doesn't stop you from
re-entrantly starting other instances of redo. To fix that properly,
we need to switch to a different locking mechanism entirely, which is
tough in python. flock() locks probably work, for example, but
python's locks lie and just use fcntl locks for those.
To fix #3, we always force --no-log mode when we find that locking is
broken.
2019-01-02 14:18:51 -05:00
|
|
|
if env.is_toplevel and detect_broken_locks():
|
|
|
|
|
env.mark_locks_broken()
|
2018-11-23 19:35:42 -05:00
|
|
|
|
2010-12-07 02:17:22 -08:00
|
|
|
must_create = not os.path.exists(dbfile)
|
|
|
|
|
if not must_create:
|
2010-12-09 04:58:05 -08:00
|
|
|
_db = _connect(dbfile)
|
2010-12-07 02:17:22 -08:00
|
|
|
try:
|
|
|
|
|
row = _db.cursor().execute("select version from Schema").fetchone()
|
|
|
|
|
except sqlite3.OperationalError:
|
|
|
|
|
row = None
|
|
|
|
|
ver = row and row[0] or None
|
|
|
|
|
if ver != SCHEMA_VER:
|
Further improve handling of symlink targets/deps.
In commit redo-0.11-4-g34669fb, we changed os.stat into os.lstat to
avoid false positives in the "manual override" detector: a .do file
that generates $3 as a symlink would trigger manual override if the
*target* of that symlink ever changed, which is incorrect.
Unfortunately using os.lstat() leads to a different problem: if X
depends on Y and Y is a symlink to Z, then X would not be rebuilt when
Z changes, which is clearly wrong.
The fix is twofold:
1. read_stamp() should change on changes to both the link itself,
*and* the target of the link.
2. We shouldn't mark a target as overridden under so many situations.
We'll use *only* the primary mtime of the os.lstat(), not all the
other bits in the stamp.
Step 2 fixes a few other false positives also. For example, if you
'cp -a' a whole tree to another location, the st_ino of all the targets
will change, which would trigger a mass of "manual override" warnings.
Although a change in inode is sufficient to count an input as having
changed (just to be extra safe), it should *not* be considered a manual
override. Now we can distinguish between the two.
Because the stamp format has changed, update the SCHEMA_VER field. I
should have done this every other time I changed the stamp format, but
I forgot. Sorry. That leads to spurious "manually modified" warnings
after upgrading redo.
2018-11-21 07:19:20 -05:00
|
|
|
# Don't use err() here because this might happen before
|
|
|
|
|
# redo-log spawns.
|
2018-12-02 23:15:37 -05:00
|
|
|
sys.stderr.write(
|
|
|
|
|
'redo: %s: found v%s (expected v%s)\n'
|
Further improve handling of symlink targets/deps.
In commit redo-0.11-4-g34669fb, we changed os.stat into os.lstat to
avoid false positives in the "manual override" detector: a .do file
that generates $3 as a symlink would trigger manual override if the
*target* of that symlink ever changed, which is incorrect.
Unfortunately using os.lstat() leads to a different problem: if X
depends on Y and Y is a symlink to Z, then X would not be rebuilt when
Z changes, which is clearly wrong.
The fix is twofold:
1. read_stamp() should change on changes to both the link itself,
*and* the target of the link.
2. We shouldn't mark a target as overridden under so many situations.
We'll use *only* the primary mtime of the os.lstat(), not all the
other bits in the stamp.
Step 2 fixes a few other false positives also. For example, if you
'cp -a' a whole tree to another location, the st_ino of all the targets
will change, which would trigger a mass of "manual override" warnings.
Although a change in inode is sufficient to count an input as having
changed (just to be extra safe), it should *not* be considered a manual
override. Now we can distinguish between the two.
Because the stamp format has changed, update the SCHEMA_VER field. I
should have done this every other time I changed the stamp format, but
I forgot. Sorry. That leads to spurious "manually modified" warnings
after upgrading redo.
2018-11-21 07:19:20 -05:00
|
|
|
% (dbfile, ver, SCHEMA_VER))
|
|
|
|
|
sys.stderr.write('redo: manually delete .redo dir to start over.\n')
|
|
|
|
|
sys.exit(1)
|
2010-12-07 02:17:22 -08:00
|
|
|
if must_create:
|
|
|
|
|
unlink(dbfile)
|
2010-12-09 04:58:05 -08:00
|
|
|
_db = _connect(dbfile)
|
2010-12-09 01:56:17 -08:00
|
|
|
_db.execute("create table Schema "
|
|
|
|
|
" (version int)")
|
2010-12-07 02:17:22 -08:00
|
|
|
_db.execute("create table Runid "
|
|
|
|
|
" (id integer primary key autoincrement)")
|
2010-12-09 01:56:17 -08:00
|
|
|
_db.execute("create table Files "
|
|
|
|
|
" (name not null primary key, "
|
|
|
|
|
" is_generated int, "
|
2010-12-10 22:42:33 -08:00
|
|
|
" is_override int, "
|
2010-12-09 01:56:17 -08:00
|
|
|
" checked_runid int, "
|
|
|
|
|
" changed_runid int, "
|
2010-12-10 20:53:31 -08:00
|
|
|
" failed_runid int, "
|
2010-12-09 01:56:17 -08:00
|
|
|
" stamp, "
|
|
|
|
|
" csum)")
|
2010-12-07 02:17:22 -08:00
|
|
|
_db.execute("create table Deps "
|
2010-12-09 01:56:17 -08:00
|
|
|
" (target int, "
|
|
|
|
|
" source int, "
|
|
|
|
|
" mode not null, "
|
2010-12-11 22:59:55 -08:00
|
|
|
" delete_me int, "
|
2010-12-09 01:56:17 -08:00
|
|
|
" primary key (target,source))")
|
2010-12-07 02:17:22 -08:00
|
|
|
_db.execute("insert into Schema (version) values (?)", [SCHEMA_VER])
|
2018-11-21 21:00:36 -05:00
|
|
|
# eat the '0' runid and File id.
|
|
|
|
|
# Because of the cheesy way t/flush-cache is implemented, leave a
|
|
|
|
|
# lot of runids available before the "first" one so that we
|
|
|
|
|
# can adjust cached values to be before the first value.
|
|
|
|
|
_db.execute("insert into Runid values (1000000000)")
|
2010-12-11 02:17:51 -08:00
|
|
|
_db.execute("insert into Files (name) values (?)", [ALWAYS])
|
2010-12-07 02:17:22 -08:00
|
|
|
|
2018-12-05 01:07:16 -05:00
|
|
|
if not env.v.RUNID:
|
2011-05-07 23:47:03 -04:00
|
|
|
_db.execute("insert into Runid values "
|
|
|
|
|
" ((select max(id)+1 from Runid))")
|
2018-12-05 01:07:16 -05:00
|
|
|
env.v.RUNID = _db.execute("select last_insert_rowid()").fetchone()[0]
|
|
|
|
|
os.environ['REDO_RUNID'] = str(env.v.RUNID)
|
2018-12-02 23:15:37 -05:00
|
|
|
|
2010-12-09 02:44:33 -08:00
|
|
|
_db.commit()
|
2010-12-07 02:17:22 -08:00
|
|
|
return _db
|
2018-12-02 23:15:37 -05:00
|
|
|
|
2010-11-21 02:08:05 -08:00
|
|
|
|
2018-12-05 01:07:16 -05:00
|
|
|
def init(targets):
|
|
|
|
|
env.init(targets)
|
2010-12-07 02:17:22 -08:00
|
|
|
db()
|
Workaround for completely broken file locking on Windows 10 WSL.
WSL (Windows Services for Linux) provides a Linux-kernel-compatible ABI
for userspace processes, but the current version doesn't not implement
fcntl() locks at all; it just always returns success. See
https://github.com/Microsoft/WSL/issues/1927.
This causes us three kinds of problem:
1. sqlite3 in WAL mode gives "OperationalError: locking protocol".
1b. Other sqlite3 journal modes also don't work when used by
multiple processes.
2. redo parallelism doesn't work, because we can't prevent the same
target from being build several times simultaneously.
3. "redo-log -f" doesn't work, since it can't tell whether the log
file it's tailing is "done" or not.
To fix #1, we switch the sqlite3 journal back to PERSIST instead of
WAL. We originally changed to WAL in commit 5156feae9d to reduce
deadlocks on MacOS. That was never adequately explained, but PERSIST
still acts weird on MacOS, so we'll only switch to PERSIST when we
detect that locking is definitely broken. Sigh.
To (mostly) fix #2, we disable any -j value > 1 when locking is broken.
This prevents basic forms of parallelism, but doesn't stop you from
re-entrantly starting other instances of redo. To fix that properly,
we need to switch to a different locking mechanism entirely, which is
tough in python. flock() locks probably work, for example, but
python's locks lie and just use fcntl locks for those.
To fix #3, we always force --no-log mode when we find that locking is
broken.
2019-01-02 14:18:51 -05:00
|
|
|
if env.is_toplevel and detect_broken_locks():
|
|
|
|
|
env.mark_locks_broken()
|
2010-11-19 03:03:05 -08:00
|
|
|
|
|
|
|
|
|
2010-12-09 02:44:33 -08:00
|
|
|
_wrote = 0
|
|
|
|
|
def _write(q, l):
|
2010-12-09 03:01:26 -08:00
|
|
|
if _insane:
|
|
|
|
|
return
|
2010-12-09 02:44:33 -08:00
|
|
|
global _wrote
|
|
|
|
|
_wrote += 1
|
|
|
|
|
db().execute(q, l)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def commit():
|
2010-12-09 03:01:26 -08:00
|
|
|
if _insane:
|
|
|
|
|
return
|
2010-12-09 02:44:33 -08:00
|
|
|
global _wrote
|
|
|
|
|
if _wrote:
|
|
|
|
|
db().commit()
|
|
|
|
|
_wrote = 0
|
|
|
|
|
|
|
|
|
|
|
2018-10-06 04:36:24 -04:00
|
|
|
def rollback():
|
|
|
|
|
if _insane:
|
|
|
|
|
return
|
|
|
|
|
global _wrote
|
|
|
|
|
if _wrote:
|
|
|
|
|
db().rollback()
|
|
|
|
|
_wrote = 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def is_flushed():
|
|
|
|
|
return not _wrote
|
|
|
|
|
|
|
|
|
|
|
2010-11-22 03:34:37 -08:00
|
|
|
_insane = None
|
2010-12-09 03:01:26 -08:00
|
|
|
def check_sane():
|
2018-12-02 23:15:37 -05:00
|
|
|
global _insane
|
2010-11-22 03:34:37 -08:00
|
|
|
if not _insane:
|
2018-12-05 01:07:16 -05:00
|
|
|
_insane = not os.path.exists('%s/.redo' % env.v.BASE)
|
2010-11-22 03:34:37 -08:00
|
|
|
return not _insane
|
|
|
|
|
|
|
|
|
|
|
2018-12-17 15:58:06 +00:00
|
|
|
def _realdirpath(t):
|
|
|
|
|
"""Like realpath(), but don't follow symlinks for the last element.
|
|
|
|
|
|
|
|
|
|
redo needs this because targets can be symlinks themselves, and we want
|
|
|
|
|
to talk about the symlink, not what it points at. However, all the path
|
|
|
|
|
elements along the way could result in pathname aliases for a *particular*
|
|
|
|
|
target, so we want to resolve it to one unique name.
|
|
|
|
|
"""
|
|
|
|
|
dname, fname = os.path.split(t)
|
|
|
|
|
if dname:
|
|
|
|
|
dname = os.path.realpath(dname)
|
|
|
|
|
return os.path.join(dname, fname)
|
|
|
|
|
|
|
|
|
|
|
2010-11-24 03:45:38 -08:00
|
|
|
_cwd = None
|
2010-11-21 04:57:04 -08:00
|
|
|
def relpath(t, base):
|
2018-12-14 08:38:53 +00:00
|
|
|
"""Given a relative or absolute path t, express it relative to base."""
|
2010-11-24 03:45:38 -08:00
|
|
|
global _cwd
|
|
|
|
|
if not _cwd:
|
|
|
|
|
_cwd = os.getcwd()
|
2018-12-17 15:58:06 +00:00
|
|
|
t = os.path.normpath(_realdirpath(os.path.join(_cwd, t)))
|
|
|
|
|
base = os.path.normpath(_realdirpath(base))
|
2010-11-21 04:57:04 -08:00
|
|
|
tparts = t.split('/')
|
|
|
|
|
bparts = base.split('/')
|
2018-12-02 23:15:37 -05:00
|
|
|
for tp, bp in zip(tparts, bparts):
|
2010-11-21 04:57:04 -08:00
|
|
|
if tp != bp:
|
|
|
|
|
break
|
|
|
|
|
tparts.pop(0)
|
|
|
|
|
bparts.pop(0)
|
|
|
|
|
while bparts:
|
|
|
|
|
tparts.insert(0, '..')
|
|
|
|
|
bparts.pop(0)
|
2019-01-18 00:06:18 +00:00
|
|
|
return '/'.join(tparts)
|
2010-11-21 04:57:04 -08:00
|
|
|
|
|
|
|
|
|
2018-12-17 15:58:06 +00:00
|
|
|
# Return a relative path for t that will work after we do
|
|
|
|
|
# chdir(dirname(env.v.TARGET)).
|
|
|
|
|
#
|
redo-log: add automated tests, and fix some path bugs revealed by them.
When a log for X was saying it wanted to refer to Y, we used a relative
path, but it was sometimes relative to the wrong starting location, so
redo-log couldn't find it later.
Two examples:
- if default.o.do is handling builds for a/b/x.o, and default.o.do
does 'redo a/b/x.h', the log for x.o should refer to ./x.h, not
a/b/x.h.
- if foo.do is handling builds for foo, and it does
"cd a/b && redo x", the log for foo should refer to a/b/x, not just
x.
2018-11-19 17:09:40 -05:00
|
|
|
# This is tricky! STARTDIR+PWD is the directory for the *dofile*, when
|
|
|
|
|
# the dofile was started. However, inside the dofile, someone may have done
|
2018-12-05 01:07:16 -05:00
|
|
|
# a chdir to anywhere else. env.v.TARGET is relative to the dofile path, so
|
redo-log: add automated tests, and fix some path bugs revealed by them.
When a log for X was saying it wanted to refer to Y, we used a relative
path, but it was sometimes relative to the wrong starting location, so
redo-log couldn't find it later.
Two examples:
- if default.o.do is handling builds for a/b/x.o, and default.o.do
does 'redo a/b/x.h', the log for x.o should refer to ./x.h, not
a/b/x.h.
- if foo.do is handling builds for foo, and it does
"cd a/b && redo x", the log for foo should refer to a/b/x, not just
x.
2018-11-19 17:09:40 -05:00
|
|
|
# we have to first figure out where the dofile was, then find TARGET relative
|
|
|
|
|
# to that, then find t relative to that.
|
|
|
|
|
#
|
|
|
|
|
# FIXME: find some cleaner terminology for all these different paths.
|
|
|
|
|
def target_relpath(t):
|
2018-12-05 01:07:16 -05:00
|
|
|
dofile_dir = os.path.abspath(os.path.join(env.v.STARTDIR, env.v.PWD))
|
redo-log: add automated tests, and fix some path bugs revealed by them.
When a log for X was saying it wanted to refer to Y, we used a relative
path, but it was sometimes relative to the wrong starting location, so
redo-log couldn't find it later.
Two examples:
- if default.o.do is handling builds for a/b/x.o, and default.o.do
does 'redo a/b/x.h', the log for x.o should refer to ./x.h, not
a/b/x.h.
- if foo.do is handling builds for foo, and it does
"cd a/b && redo x", the log for foo should refer to a/b/x, not just
x.
2018-11-19 17:09:40 -05:00
|
|
|
target_dir = os.path.abspath(
|
2018-12-05 01:07:16 -05:00
|
|
|
os.path.dirname(os.path.join(dofile_dir, env.v.TARGET)))
|
redo-log: add automated tests, and fix some path bugs revealed by them.
When a log for X was saying it wanted to refer to Y, we used a relative
path, but it was sometimes relative to the wrong starting location, so
redo-log couldn't find it later.
Two examples:
- if default.o.do is handling builds for a/b/x.o, and default.o.do
does 'redo a/b/x.h', the log for x.o should refer to ./x.h, not
a/b/x.h.
- if foo.do is handling builds for foo, and it does
"cd a/b && redo x", the log for foo should refer to a/b/x, not just
x.
2018-11-19 17:09:40 -05:00
|
|
|
return relpath(t, target_dir)
|
|
|
|
|
|
|
|
|
|
|
Further improve handling of symlink targets/deps.
In commit redo-0.11-4-g34669fb, we changed os.stat into os.lstat to
avoid false positives in the "manual override" detector: a .do file
that generates $3 as a symlink would trigger manual override if the
*target* of that symlink ever changed, which is incorrect.
Unfortunately using os.lstat() leads to a different problem: if X
depends on Y and Y is a symlink to Z, then X would not be rebuilt when
Z changes, which is clearly wrong.
The fix is twofold:
1. read_stamp() should change on changes to both the link itself,
*and* the target of the link.
2. We shouldn't mark a target as overridden under so many situations.
We'll use *only* the primary mtime of the os.lstat(), not all the
other bits in the stamp.
Step 2 fixes a few other false positives also. For example, if you
'cp -a' a whole tree to another location, the st_ino of all the targets
will change, which would trigger a mass of "manual override" warnings.
Although a change in inode is sufficient to count an input as having
changed (just to be extra safe), it should *not* be considered a manual
override. Now we can distinguish between the two.
Because the stamp format has changed, update the SCHEMA_VER field. I
should have done this every other time I changed the stamp format, but
I forgot. Sorry. That leads to spurious "manually modified" warnings
after upgrading redo.
2018-11-21 07:19:20 -05:00
|
|
|
def detect_override(stamp1, stamp2):
|
|
|
|
|
"""Determine if two stamps differ in a way that means manual override.
|
|
|
|
|
|
|
|
|
|
When two stamps differ at all, that means the source is dirty and so we
|
|
|
|
|
need to rebuild. If they differ in mtime or size, then someone has surely
|
|
|
|
|
edited the file, and we don't want to trample their changes.
|
|
|
|
|
|
|
|
|
|
But if the only difference is something else (like ownership, st_mode,
|
|
|
|
|
etc) then that might be a false positive; it's annoying to mark as
|
|
|
|
|
overridden in that case, so we return False. (It's still dirty though!)
|
|
|
|
|
"""
|
|
|
|
|
if stamp1 == stamp2:
|
|
|
|
|
return False
|
|
|
|
|
crit1 = stamp1.split('-', 2)[0:2]
|
|
|
|
|
crit2 = stamp2.split('-', 2)[0:2]
|
|
|
|
|
return crit1 != crit2
|
|
|
|
|
|
|
|
|
|
|
2010-12-19 02:31:40 -08:00
|
|
|
def warn_override(name):
|
|
|
|
|
warn('%s - you modified it; skipping\n' % name)
|
|
|
|
|
|
|
|
|
|
|
2010-12-19 01:38:38 -08:00
|
|
|
_file_cols = ['rowid', 'name', 'is_generated', 'is_override',
|
|
|
|
|
'checked_runid', 'changed_runid', 'failed_runid',
|
|
|
|
|
'stamp', 'csum']
|
2010-12-07 02:17:22 -08:00
|
|
|
class File(object):
|
2018-12-14 08:38:53 +00:00
|
|
|
"""An object representing a source or target in the redo database."""
|
|
|
|
|
|
2010-12-10 02:58:13 -08:00
|
|
|
# use this mostly to avoid accidentally assigning to typos
|
2010-12-19 01:38:38 -08:00
|
|
|
__slots__ = ['id'] + _file_cols[1:]
|
2010-12-09 02:13:36 -08:00
|
|
|
|
2018-12-02 23:15:37 -05:00
|
|
|
# These warnings are a result of the weird way this class is
|
|
|
|
|
# initialized, which we should fix, and then re-enable warning.
|
|
|
|
|
# pylint: disable=attribute-defined-outside-init
|
2018-12-04 23:34:28 -05:00
|
|
|
def _init_from_idname(self, fid, name, allow_add):
|
2019-01-18 00:06:18 +00:00
|
|
|
q = ('select %s from Files ' % ', '.join(_file_cols))
|
2018-12-04 23:34:28 -05:00
|
|
|
if fid != None:
|
2010-12-07 02:17:22 -08:00
|
|
|
q += 'where rowid=?'
|
2018-12-04 23:34:28 -05:00
|
|
|
l = [fid]
|
2010-12-07 02:17:22 -08:00
|
|
|
elif name != None:
|
2018-12-05 01:07:16 -05:00
|
|
|
name = (name == ALWAYS) and ALWAYS or relpath(name, env.v.BASE)
|
2010-12-07 02:17:22 -08:00
|
|
|
q += 'where name=?'
|
|
|
|
|
l = [name]
|
2010-11-21 04:14:52 -08:00
|
|
|
else:
|
2010-12-07 02:17:22 -08:00
|
|
|
raise Exception('name or id must be set')
|
|
|
|
|
d = db()
|
|
|
|
|
row = d.execute(q, l).fetchone()
|
|
|
|
|
if not row:
|
|
|
|
|
if not name:
|
2018-12-04 23:34:28 -05:00
|
|
|
raise KeyError('No file with id=%r name=%r' % (fid, name))
|
redo-log: capture and linearize the output of redo builds.
redo now saves the stderr from every .do script, for every target, into
a file in the .redo directory. That means you can look up the logs
from the most recent build of any target using the new redo-log
command, for example:
redo-log -r all
The default is to show logs non-recursively, that is, it'll show when a
target does redo-ifchange on another target, but it won't recurse into
the logs for the latter target. With -r (recursive), it does. With -u
(unchanged), it does even if redo-ifchange discovered that the target
was already up-to-date; in that case, it prints the logs of the *most
recent* time the target was generated.
With --no-details, redo-log will show only the 'redo' lines, not the
other log messages. For very noisy build systems (like recursing into
a 'make' instance) this can be helpful to get an overview of what
happened, without all the cruft.
You can use the -f (follow) option like tail -f, to follow a build
that's currently in progress until it finishes. redo itself spins up a
copy of redo-log -r -f while it runs, so you can see what's going on.
Still broken in this version:
- No man page or new tests yet.
- ANSI colors don't yet work (unless you use --raw-logs, which gives
the old-style behaviour).
- You can't redirect the output of a sub-redo to a file or a
pipe right now, because redo-log is eating it.
- The regex for matching 'redo' lines in the log is very gross.
Instead, we should put the raw log files in a more machine-parseable
format, and redo-log should turn that into human-readable format.
- redo-log tries to "linearize" the logs, which makes them
comprehensible even for a large parallel build. It recursively shows
log messages for each target in depth-first tree order (by tracing
into a new target every time it sees a 'redo' line). This works
really well, but in some specific cases, the "topmost" redo instance
can get stuck waiting for a jwack token, which makes it look like the
whole build has stalled, when really redo-log is just waiting a long
time for a particular subprocess to be able to continue. We'll need to
add a specific workaround for that.
2018-11-03 22:09:18 -04:00
|
|
|
elif not allow_add:
|
|
|
|
|
raise KeyError('No file with name=%r' % (name,))
|
2010-12-09 03:33:53 -08:00
|
|
|
try:
|
|
|
|
|
_write('insert into Files (name) values (?)', [name])
|
|
|
|
|
except sqlite3.IntegrityError:
|
|
|
|
|
# some parallel redo probably added it at the same time; no
|
|
|
|
|
# big deal.
|
|
|
|
|
pass
|
2010-12-07 02:17:22 -08:00
|
|
|
row = d.execute(q, l).fetchone()
|
2018-12-02 23:15:37 -05:00
|
|
|
assert row
|
2010-12-11 02:17:51 -08:00
|
|
|
return self._init_from_cols(row)
|
|
|
|
|
|
|
|
|
|
def _init_from_cols(self, cols):
|
|
|
|
|
(self.id, self.name, self.is_generated, self.is_override,
|
|
|
|
|
self.checked_runid, self.changed_runid, self.failed_runid,
|
|
|
|
|
self.stamp, self.csum) = cols
|
2018-12-05 01:07:16 -05:00
|
|
|
if self.name == ALWAYS and self.changed_runid < env.v.RUNID:
|
|
|
|
|
self.changed_runid = env.v.RUNID
|
2018-12-02 23:15:37 -05:00
|
|
|
|
2018-12-04 23:34:28 -05:00
|
|
|
def __init__(self, fid=None, name=None, cols=None, allow_add=True):
|
2010-12-11 02:17:51 -08:00
|
|
|
if cols:
|
2018-12-02 23:15:37 -05:00
|
|
|
self._init_from_cols(cols)
|
2010-12-11 02:17:51 -08:00
|
|
|
else:
|
2018-12-04 23:34:28 -05:00
|
|
|
self._init_from_idname(fid, name, allow_add=allow_add)
|
2010-12-11 02:17:51 -08:00
|
|
|
|
2015-05-06 17:56:14 -04:00
|
|
|
def __repr__(self):
|
|
|
|
|
return "File(%r)" % (self.nicename(),)
|
|
|
|
|
|
2010-12-11 02:17:51 -08:00
|
|
|
def refresh(self):
|
redo-log: capture and linearize the output of redo builds.
redo now saves the stderr from every .do script, for every target, into
a file in the .redo directory. That means you can look up the logs
from the most recent build of any target using the new redo-log
command, for example:
redo-log -r all
The default is to show logs non-recursively, that is, it'll show when a
target does redo-ifchange on another target, but it won't recurse into
the logs for the latter target. With -r (recursive), it does. With -u
(unchanged), it does even if redo-ifchange discovered that the target
was already up-to-date; in that case, it prints the logs of the *most
recent* time the target was generated.
With --no-details, redo-log will show only the 'redo' lines, not the
other log messages. For very noisy build systems (like recursing into
a 'make' instance) this can be helpful to get an overview of what
happened, without all the cruft.
You can use the -f (follow) option like tail -f, to follow a build
that's currently in progress until it finishes. redo itself spins up a
copy of redo-log -r -f while it runs, so you can see what's going on.
Still broken in this version:
- No man page or new tests yet.
- ANSI colors don't yet work (unless you use --raw-logs, which gives
the old-style behaviour).
- You can't redirect the output of a sub-redo to a file or a
pipe right now, because redo-log is eating it.
- The regex for matching 'redo' lines in the log is very gross.
Instead, we should put the raw log files in a more machine-parseable
format, and redo-log should turn that into human-readable format.
- redo-log tries to "linearize" the logs, which makes them
comprehensible even for a large parallel build. It recursively shows
log messages for each target in depth-first tree order (by tracing
into a new target every time it sees a 'redo' line). This works
really well, but in some specific cases, the "topmost" redo instance
can get stuck waiting for a jwack token, which makes it look like the
whole build has stalled, when really redo-log is just waiting a long
time for a particular subprocess to be able to continue. We'll need to
add a specific workaround for that.
2018-11-03 22:09:18 -04:00
|
|
|
self._init_from_idname(self.id, None, allow_add=False)
|
2010-12-07 02:17:22 -08:00
|
|
|
|
|
|
|
|
def save(self):
|
2019-01-18 00:06:18 +00:00
|
|
|
cols = ', '.join(['%s=?'%i for i in _file_cols[2:]])
|
2010-12-09 02:44:33 -08:00
|
|
|
_write('update Files set '
|
2010-12-19 01:38:38 -08:00
|
|
|
' %s '
|
|
|
|
|
' where rowid=?' % cols,
|
2010-12-10 22:42:33 -08:00
|
|
|
[self.is_generated, self.is_override,
|
2010-12-10 20:53:31 -08:00
|
|
|
self.checked_runid, self.changed_runid, self.failed_runid,
|
2010-12-09 02:44:33 -08:00
|
|
|
self.stamp, self.csum,
|
|
|
|
|
self.id])
|
2010-12-07 02:17:22 -08:00
|
|
|
|
|
|
|
|
def set_checked(self):
|
2018-12-05 01:07:16 -05:00
|
|
|
self.checked_runid = env.v.RUNID
|
2010-12-10 20:53:31 -08:00
|
|
|
|
2010-12-19 03:39:37 -08:00
|
|
|
def set_checked_save(self):
|
|
|
|
|
self.set_checked()
|
|
|
|
|
self.save()
|
|
|
|
|
|
2010-12-07 02:17:22 -08:00
|
|
|
def set_changed(self):
|
|
|
|
|
debug2('BUILT: %r (%r)\n' % (self.name, self.stamp))
|
2018-12-05 01:07:16 -05:00
|
|
|
self.changed_runid = env.v.RUNID
|
2010-12-10 22:42:33 -08:00
|
|
|
self.failed_runid = None
|
|
|
|
|
self.is_override = False
|
2010-12-07 02:17:22 -08:00
|
|
|
|
2010-12-10 20:53:31 -08:00
|
|
|
def set_failed(self):
|
|
|
|
|
debug2('FAILED: %r\n' % self.name)
|
2010-12-10 22:42:33 -08:00
|
|
|
self.update_stamp()
|
2018-12-05 01:07:16 -05:00
|
|
|
self.failed_runid = env.v.RUNID
|
2018-12-02 16:53:05 -05:00
|
|
|
if self.stamp != STAMP_MISSING:
|
|
|
|
|
# if we failed and the target file still exists,
|
|
|
|
|
# then we're generated.
|
|
|
|
|
self.is_generated = True
|
|
|
|
|
else:
|
|
|
|
|
# if the target file now does *not* exist, then go back to
|
|
|
|
|
# treating this as a source file. Since it doesn't exist,
|
|
|
|
|
# if someone tries to rebuild it immediately, it'll go
|
|
|
|
|
# back to being a target. But if the file is manually
|
|
|
|
|
# created before that, we don't need a "manual override"
|
|
|
|
|
# warning.
|
|
|
|
|
self.is_generated = False
|
2010-12-10 20:53:31 -08:00
|
|
|
|
2010-12-07 02:17:22 -08:00
|
|
|
def set_static(self):
|
2011-01-17 23:57:20 -08:00
|
|
|
self.update_stamp(must_exist=True)
|
2018-11-21 21:00:36 -05:00
|
|
|
self.failed_runid = None
|
2010-12-10 22:42:33 -08:00
|
|
|
self.is_override = False
|
|
|
|
|
self.is_generated = False
|
|
|
|
|
|
|
|
|
|
def set_override(self):
|
|
|
|
|
self.update_stamp()
|
2018-11-21 21:00:36 -05:00
|
|
|
self.failed_runid = None
|
2010-12-10 22:42:33 -08:00
|
|
|
self.is_override = True
|
2010-12-07 02:17:22 -08:00
|
|
|
|
2011-01-17 23:57:20 -08:00
|
|
|
def update_stamp(self, must_exist=False):
|
2010-12-07 02:17:22 -08:00
|
|
|
newstamp = self.read_stamp()
|
2011-01-17 23:57:20 -08:00
|
|
|
if must_exist and newstamp == STAMP_MISSING:
|
|
|
|
|
raise Exception("%r does not exist" % self.name)
|
2010-12-07 02:17:22 -08:00
|
|
|
if newstamp != self.stamp:
|
|
|
|
|
debug2("STAMP: %s: %r -> %r\n" % (self.name, self.stamp, newstamp))
|
|
|
|
|
self.stamp = newstamp
|
|
|
|
|
self.set_changed()
|
|
|
|
|
|
2018-12-02 16:53:05 -05:00
|
|
|
def is_source(self):
|
2018-12-14 08:38:53 +00:00
|
|
|
"""Returns true if this object represents a source (not a target)."""
|
2018-12-02 16:53:05 -05:00
|
|
|
if self.name.startswith('//'):
|
|
|
|
|
return False # special name, ignore
|
|
|
|
|
newstamp = self.read_stamp()
|
|
|
|
|
if (self.is_generated and
|
2018-12-02 23:15:37 -05:00
|
|
|
(not self.is_failed() or newstamp != STAMP_MISSING) and
|
|
|
|
|
not self.is_override and
|
|
|
|
|
self.stamp == newstamp):
|
2018-12-02 16:53:05 -05:00
|
|
|
# target is as we left it
|
|
|
|
|
return False
|
|
|
|
|
if ((not self.is_generated or self.stamp != newstamp) and
|
2018-12-02 23:15:37 -05:00
|
|
|
newstamp == STAMP_MISSING):
|
2018-12-02 16:53:05 -05:00
|
|
|
# target has gone missing after the last build.
|
|
|
|
|
# It's not usefully a source *or* a target.
|
|
|
|
|
return False
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
def is_target(self):
|
2018-12-14 08:38:53 +00:00
|
|
|
"""Returns true if this object represents a target (not a source)."""
|
2018-12-02 16:53:05 -05:00
|
|
|
if not self.is_generated:
|
|
|
|
|
return False
|
|
|
|
|
if self.is_source():
|
|
|
|
|
return False
|
|
|
|
|
return True
|
|
|
|
|
|
2010-12-10 20:53:31 -08:00
|
|
|
def is_checked(self):
|
2018-12-05 01:07:16 -05:00
|
|
|
return self.checked_runid and self.checked_runid >= env.v.RUNID
|
2010-12-10 20:53:31 -08:00
|
|
|
|
2010-12-07 02:17:22 -08:00
|
|
|
def is_changed(self):
|
2018-12-05 01:07:16 -05:00
|
|
|
return self.changed_runid and self.changed_runid >= env.v.RUNID
|
2010-12-07 02:17:22 -08:00
|
|
|
|
2010-12-10 20:53:31 -08:00
|
|
|
def is_failed(self):
|
2018-12-05 01:07:16 -05:00
|
|
|
return self.failed_runid and self.failed_runid >= env.v.RUNID
|
2010-12-07 02:17:22 -08:00
|
|
|
|
|
|
|
|
def deps(self):
|
2018-12-14 08:38:53 +00:00
|
|
|
"""Return the list of objects that this object depends on."""
|
2018-12-02 16:53:05 -05:00
|
|
|
if self.is_override or not self.is_generated:
|
|
|
|
|
return
|
2010-12-19 01:38:38 -08:00
|
|
|
q = ('select Deps.mode, Deps.source, %s '
|
2010-12-09 02:13:36 -08:00
|
|
|
' from Files '
|
|
|
|
|
' join Deps on Files.rowid = Deps.source '
|
2019-01-18 00:06:18 +00:00
|
|
|
' where target=?' % ', '.join(_file_cols[1:]))
|
2010-12-09 02:13:36 -08:00
|
|
|
for row in db().execute(q, [self.id]).fetchall():
|
|
|
|
|
mode = row[0]
|
|
|
|
|
cols = row[1:]
|
2018-12-02 23:15:37 -05:00
|
|
|
assert mode in ('c', 'm')
|
|
|
|
|
yield mode, File(cols=cols)
|
2010-12-07 02:17:22 -08:00
|
|
|
|
2010-12-11 22:59:55 -08:00
|
|
|
def zap_deps1(self):
|
2018-12-14 08:38:53 +00:00
|
|
|
"""Mark the list of dependencies of this object as deprecated.
|
|
|
|
|
|
|
|
|
|
We do this when starting a new build of the current target. We don't
|
|
|
|
|
delete them right away, because if the build fails, we still want to
|
|
|
|
|
know the old deps.
|
|
|
|
|
"""
|
2010-12-11 22:59:55 -08:00
|
|
|
debug2('zap-deps1: %r\n' % self.name)
|
|
|
|
|
_write('update Deps set delete_me=? where target=?', [True, self.id])
|
|
|
|
|
|
|
|
|
|
def zap_deps2(self):
|
2018-12-14 08:38:53 +00:00
|
|
|
"""Delete any deps that were *not* referenced in the current run.
|
|
|
|
|
|
|
|
|
|
Dependencies of a given target can change from one build to the next.
|
|
|
|
|
We forget old dependencies only after a build completes successfully.
|
|
|
|
|
"""
|
2010-12-11 22:59:55 -08:00
|
|
|
debug2('zap-deps2: %r\n' % self.name)
|
|
|
|
|
_write('delete from Deps where target=? and delete_me=1', [self.id])
|
2010-12-07 02:17:22 -08:00
|
|
|
|
|
|
|
|
def add_dep(self, mode, dep):
|
|
|
|
|
src = File(name=dep)
|
2010-12-19 05:47:38 -08:00
|
|
|
debug3('add-dep: "%s" < %s "%s"\n' % (self.name, mode, src.name))
|
2018-12-02 23:15:37 -05:00
|
|
|
assert self.id != src.id
|
2010-12-09 02:44:33 -08:00
|
|
|
_write("insert or replace into Deps "
|
2010-12-11 22:59:55 -08:00
|
|
|
" (target, mode, source, delete_me) values (?,?,?,?)",
|
|
|
|
|
[self.id, mode, src.id, False])
|
2010-12-07 02:17:22 -08:00
|
|
|
|
Further improve handling of symlink targets/deps.
In commit redo-0.11-4-g34669fb, we changed os.stat into os.lstat to
avoid false positives in the "manual override" detector: a .do file
that generates $3 as a symlink would trigger manual override if the
*target* of that symlink ever changed, which is incorrect.
Unfortunately using os.lstat() leads to a different problem: if X
depends on Y and Y is a symlink to Z, then X would not be rebuilt when
Z changes, which is clearly wrong.
The fix is twofold:
1. read_stamp() should change on changes to both the link itself,
*and* the target of the link.
2. We shouldn't mark a target as overridden under so many situations.
We'll use *only* the primary mtime of the os.lstat(), not all the
other bits in the stamp.
Step 2 fixes a few other false positives also. For example, if you
'cp -a' a whole tree to another location, the st_ino of all the targets
will change, which would trigger a mass of "manual override" warnings.
Although a change in inode is sufficient to count an input as having
changed (just to be extra safe), it should *not* be considered a manual
override. Now we can distinguish between the two.
Because the stamp format has changed, update the SCHEMA_VER field. I
should have done this every other time I changed the stamp format, but
I forgot. Sorry. That leads to spurious "manually modified" warnings
after upgrading redo.
2018-11-21 07:19:20 -05:00
|
|
|
def _read_stamp_st(self, statfunc):
|
2010-12-07 02:17:22 -08:00
|
|
|
try:
|
2018-12-05 01:07:16 -05:00
|
|
|
st = statfunc(os.path.join(env.v.BASE, self.name))
|
2010-12-07 02:17:22 -08:00
|
|
|
except OSError:
|
Further improve handling of symlink targets/deps.
In commit redo-0.11-4-g34669fb, we changed os.stat into os.lstat to
avoid false positives in the "manual override" detector: a .do file
that generates $3 as a symlink would trigger manual override if the
*target* of that symlink ever changed, which is incorrect.
Unfortunately using os.lstat() leads to a different problem: if X
depends on Y and Y is a symlink to Z, then X would not be rebuilt when
Z changes, which is clearly wrong.
The fix is twofold:
1. read_stamp() should change on changes to both the link itself,
*and* the target of the link.
2. We shouldn't mark a target as overridden under so many situations.
We'll use *only* the primary mtime of the os.lstat(), not all the
other bits in the stamp.
Step 2 fixes a few other false positives also. For example, if you
'cp -a' a whole tree to another location, the st_ino of all the targets
will change, which would trigger a mass of "manual override" warnings.
Although a change in inode is sufficient to count an input as having
changed (just to be extra safe), it should *not* be considered a manual
override. Now we can distinguish between the two.
Because the stamp format has changed, update the SCHEMA_VER field. I
should have done this every other time I changed the stamp format, but
I forgot. Sorry. That leads to spurious "manually modified" warnings
after upgrading redo.
2018-11-21 07:19:20 -05:00
|
|
|
return False, STAMP_MISSING
|
2010-12-07 02:17:22 -08:00
|
|
|
if stat.S_ISDIR(st.st_mode):
|
Further improve handling of symlink targets/deps.
In commit redo-0.11-4-g34669fb, we changed os.stat into os.lstat to
avoid false positives in the "manual override" detector: a .do file
that generates $3 as a symlink would trigger manual override if the
*target* of that symlink ever changed, which is incorrect.
Unfortunately using os.lstat() leads to a different problem: if X
depends on Y and Y is a symlink to Z, then X would not be rebuilt when
Z changes, which is clearly wrong.
The fix is twofold:
1. read_stamp() should change on changes to both the link itself,
*and* the target of the link.
2. We shouldn't mark a target as overridden under so many situations.
We'll use *only* the primary mtime of the os.lstat(), not all the
other bits in the stamp.
Step 2 fixes a few other false positives also. For example, if you
'cp -a' a whole tree to another location, the st_ino of all the targets
will change, which would trigger a mass of "manual override" warnings.
Although a change in inode is sufficient to count an input as having
changed (just to be extra safe), it should *not* be considered a manual
override. Now we can distinguish between the two.
Because the stamp format has changed, update the SCHEMA_VER field. I
should have done this every other time I changed the stamp format, but
I forgot. Sorry. That leads to spurious "manually modified" warnings
after upgrading redo.
2018-11-21 07:19:20 -05:00
|
|
|
# directories change too much; detect only existence.
|
|
|
|
|
return False, STAMP_DIR
|
2010-11-21 00:54:35 -08:00
|
|
|
else:
|
2010-12-07 02:17:22 -08:00
|
|
|
# a "unique identifier" stamp for a regular file
|
2018-12-02 23:15:37 -05:00
|
|
|
return (
|
|
|
|
|
stat.S_ISLNK(st.st_mode),
|
Further improve handling of symlink targets/deps.
In commit redo-0.11-4-g34669fb, we changed os.stat into os.lstat to
avoid false positives in the "manual override" detector: a .do file
that generates $3 as a symlink would trigger manual override if the
*target* of that symlink ever changed, which is incorrect.
Unfortunately using os.lstat() leads to a different problem: if X
depends on Y and Y is a symlink to Z, then X would not be rebuilt when
Z changes, which is clearly wrong.
The fix is twofold:
1. read_stamp() should change on changes to both the link itself,
*and* the target of the link.
2. We shouldn't mark a target as overridden under so many situations.
We'll use *only* the primary mtime of the os.lstat(), not all the
other bits in the stamp.
Step 2 fixes a few other false positives also. For example, if you
'cp -a' a whole tree to another location, the st_ino of all the targets
will change, which would trigger a mass of "manual override" warnings.
Although a change in inode is sufficient to count an input as having
changed (just to be extra safe), it should *not* be considered a manual
override. Now we can distinguish between the two.
Because the stamp format has changed, update the SCHEMA_VER field. I
should have done this every other time I changed the stamp format, but
I forgot. Sorry. That leads to spurious "manually modified" warnings
after upgrading redo.
2018-11-21 07:19:20 -05:00
|
|
|
'-'.join(str(s) for s in
|
|
|
|
|
('%.6f' % st.st_mtime, st.st_size, st.st_ino,
|
2018-12-02 23:15:37 -05:00
|
|
|
st.st_mode, st.st_uid, st.st_gid))
|
|
|
|
|
)
|
Further improve handling of symlink targets/deps.
In commit redo-0.11-4-g34669fb, we changed os.stat into os.lstat to
avoid false positives in the "manual override" detector: a .do file
that generates $3 as a symlink would trigger manual override if the
*target* of that symlink ever changed, which is incorrect.
Unfortunately using os.lstat() leads to a different problem: if X
depends on Y and Y is a symlink to Z, then X would not be rebuilt when
Z changes, which is clearly wrong.
The fix is twofold:
1. read_stamp() should change on changes to both the link itself,
*and* the target of the link.
2. We shouldn't mark a target as overridden under so many situations.
We'll use *only* the primary mtime of the os.lstat(), not all the
other bits in the stamp.
Step 2 fixes a few other false positives also. For example, if you
'cp -a' a whole tree to another location, the st_ino of all the targets
will change, which would trigger a mass of "manual override" warnings.
Although a change in inode is sufficient to count an input as having
changed (just to be extra safe), it should *not* be considered a manual
override. Now we can distinguish between the two.
Because the stamp format has changed, update the SCHEMA_VER field. I
should have done this every other time I changed the stamp format, but
I forgot. Sorry. That leads to spurious "manually modified" warnings
after upgrading redo.
2018-11-21 07:19:20 -05:00
|
|
|
|
|
|
|
|
def read_stamp(self):
|
|
|
|
|
is_link, pre = self._read_stamp_st(os.lstat)
|
|
|
|
|
if is_link:
|
|
|
|
|
# if we're a symlink, we actually care about the link object
|
|
|
|
|
# itself, *and* the target of the link. If either changes,
|
|
|
|
|
# we're considered dirty.
|
|
|
|
|
#
|
|
|
|
|
# On the other hand, detect_override() doesn't care about the
|
|
|
|
|
# target of the link, only the link itself.
|
|
|
|
|
_, post = self._read_stamp_st(os.stat)
|
|
|
|
|
return pre + '+' + post
|
|
|
|
|
else:
|
|
|
|
|
return pre
|
2010-11-19 03:03:05 -08:00
|
|
|
|
2010-12-11 21:19:15 -08:00
|
|
|
def nicename(self):
|
2018-12-05 01:07:16 -05:00
|
|
|
return relpath(os.path.join(env.v.BASE, self.name), env.v.STARTDIR)
|
2010-12-11 21:19:15 -08:00
|
|
|
|
|
|
|
|
|
2010-12-19 01:38:38 -08:00
|
|
|
def files():
|
2019-01-18 00:06:18 +00:00
|
|
|
q = ('select %s from Files order by name' % ', '.join(_file_cols))
|
2010-12-19 01:38:38 -08:00
|
|
|
for cols in db().execute(q).fetchall():
|
|
|
|
|
yield File(cols=cols)
|
|
|
|
|
|
2010-11-19 03:03:05 -08:00
|
|
|
|
redo-log: capture and linearize the output of redo builds.
redo now saves the stderr from every .do script, for every target, into
a file in the .redo directory. That means you can look up the logs
from the most recent build of any target using the new redo-log
command, for example:
redo-log -r all
The default is to show logs non-recursively, that is, it'll show when a
target does redo-ifchange on another target, but it won't recurse into
the logs for the latter target. With -r (recursive), it does. With -u
(unchanged), it does even if redo-ifchange discovered that the target
was already up-to-date; in that case, it prints the logs of the *most
recent* time the target was generated.
With --no-details, redo-log will show only the 'redo' lines, not the
other log messages. For very noisy build systems (like recursing into
a 'make' instance) this can be helpful to get an overview of what
happened, without all the cruft.
You can use the -f (follow) option like tail -f, to follow a build
that's currently in progress until it finishes. redo itself spins up a
copy of redo-log -r -f while it runs, so you can see what's going on.
Still broken in this version:
- No man page or new tests yet.
- ANSI colors don't yet work (unless you use --raw-logs, which gives
the old-style behaviour).
- You can't redirect the output of a sub-redo to a file or a
pipe right now, because redo-log is eating it.
- The regex for matching 'redo' lines in the log is very gross.
Instead, we should put the raw log files in a more machine-parseable
format, and redo-log should turn that into human-readable format.
- redo-log tries to "linearize" the logs, which makes them
comprehensible even for a large parallel build. It recursively shows
log messages for each target in depth-first tree order (by tracing
into a new target every time it sees a 'redo' line). This works
really well, but in some specific cases, the "topmost" redo instance
can get stuck waiting for a jwack token, which makes it look like the
whole build has stalled, when really redo-log is just waiting a long
time for a particular subprocess to be able to continue. We'll need to
add a specific workaround for that.
2018-11-03 22:09:18 -04:00
|
|
|
def logname(fid):
|
|
|
|
|
"""Given the id of a File, return the filename of its build log."""
|
2018-12-05 01:07:16 -05:00
|
|
|
return os.path.join(env.v.BASE, '.redo', 'log.%d' % fid)
|
redo-log: capture and linearize the output of redo builds.
redo now saves the stderr from every .do script, for every target, into
a file in the .redo directory. That means you can look up the logs
from the most recent build of any target using the new redo-log
command, for example:
redo-log -r all
The default is to show logs non-recursively, that is, it'll show when a
target does redo-ifchange on another target, but it won't recurse into
the logs for the latter target. With -r (recursive), it does. With -u
(unchanged), it does even if redo-ifchange discovered that the target
was already up-to-date; in that case, it prints the logs of the *most
recent* time the target was generated.
With --no-details, redo-log will show only the 'redo' lines, not the
other log messages. For very noisy build systems (like recursing into
a 'make' instance) this can be helpful to get an overview of what
happened, without all the cruft.
You can use the -f (follow) option like tail -f, to follow a build
that's currently in progress until it finishes. redo itself spins up a
copy of redo-log -r -f while it runs, so you can see what's going on.
Still broken in this version:
- No man page or new tests yet.
- ANSI colors don't yet work (unless you use --raw-logs, which gives
the old-style behaviour).
- You can't redirect the output of a sub-redo to a file or a
pipe right now, because redo-log is eating it.
- The regex for matching 'redo' lines in the log is very gross.
Instead, we should put the raw log files in a more machine-parseable
format, and redo-log should turn that into human-readable format.
- redo-log tries to "linearize" the logs, which makes them
comprehensible even for a large parallel build. It recursively shows
log messages for each target in depth-first tree order (by tracing
into a new target every time it sees a 'redo' line). This works
really well, but in some specific cases, the "topmost" redo instance
can get stuck waiting for a jwack token, which makes it look like the
whole build has stalled, when really redo-log is just waiting a long
time for a particular subprocess to be able to continue. We'll need to
add a specific workaround for that.
2018-11-03 22:09:18 -04:00
|
|
|
|
|
|
|
|
|
2010-12-10 02:58:13 -08:00
|
|
|
# FIXME: I really want to use fcntl F_SETLK, F_SETLKW, etc here. But python
|
|
|
|
|
# doesn't do the lockdata structure in a portable way, so we have to use
|
|
|
|
|
# fcntl.lockf() instead. Usually this is just a wrapper for fcntl, so it's
|
|
|
|
|
# ok, but it doesn't have F_GETLK, so we can't report which pid owns the lock.
|
|
|
|
|
# The makes debugging a bit harder. When we someday port to C, we can do that.
|
2010-12-14 02:19:08 -08:00
|
|
|
_locks = {}
|
2018-12-02 23:15:37 -05:00
|
|
|
class Lock(object):
|
2018-12-14 08:38:53 +00:00
|
|
|
"""An object representing a lock on a redo target file."""
|
|
|
|
|
|
2010-12-10 02:58:13 -08:00
|
|
|
def __init__(self, fid):
|
2018-12-14 08:38:53 +00:00
|
|
|
"""Initialize a lock, given the target's state.File.id."""
|
2010-11-21 03:57:52 -08:00
|
|
|
self.owned = False
|
2010-12-10 02:58:13 -08:00
|
|
|
self.fid = fid
|
2018-12-02 23:15:37 -05:00
|
|
|
assert _lockfile >= 0
|
|
|
|
|
assert _locks.get(fid, 0) == 0
|
2010-12-14 02:19:08 -08:00
|
|
|
_locks[fid] = 1
|
2010-11-19 03:03:05 -08:00
|
|
|
|
|
|
|
|
def __del__(self):
|
2010-12-14 02:19:08 -08:00
|
|
|
_locks[self.fid] = 0
|
2010-11-19 03:03:05 -08:00
|
|
|
if self.owned:
|
|
|
|
|
self.unlock()
|
|
|
|
|
|
Cyclic dependency checker: don't give up token in common case.
The way the code was written, we'd give up our token, detect a cyclic
dependency, and then try to get our token back before exiting. Even
with -j1, the temporary token release allowed any parent up the tree to
continue running jobs, so it would take an arbitrary amount of time
before we could exit (and report an error code to the parent).
There was no visible symptom of this except that, with -j1, t/355-deps-cyclic
would not finish until some of the later tests finished, which was
surprising.
To fix it, let's just check for a cyclic dependency first, then release
the token only once we're sure things are sane.
2018-11-13 06:54:31 -05:00
|
|
|
def check(self):
|
2018-12-14 08:38:53 +00:00
|
|
|
"""Check that this lock is in a sane state."""
|
2018-12-02 23:15:37 -05:00
|
|
|
assert not self.owned
|
2018-12-05 00:18:07 -05:00
|
|
|
cycles.check(self.fid)
|
Cyclic dependency checker: don't give up token in common case.
The way the code was written, we'd give up our token, detect a cyclic
dependency, and then try to get our token back before exiting. Even
with -j1, the temporary token release allowed any parent up the tree to
continue running jobs, so it would take an arbitrary amount of time
before we could exit (and report an error code to the parent).
There was no visible symptom of this except that, with -j1, t/355-deps-cyclic
would not finish until some of the later tests finished, which was
surprising.
To fix it, let's just check for a cyclic dependency first, then release
the token only once we're sure things are sane.
2018-11-13 06:54:31 -05:00
|
|
|
|
|
|
|
|
def trylock(self):
|
2018-12-14 08:38:53 +00:00
|
|
|
"""Non-blocking try to acquire our lock; returns true if it worked."""
|
Cyclic dependency checker: don't give up token in common case.
The way the code was written, we'd give up our token, detect a cyclic
dependency, and then try to get our token back before exiting. Even
with -j1, the temporary token release allowed any parent up the tree to
continue running jobs, so it would take an arbitrary amount of time
before we could exit (and report an error code to the parent).
There was no visible symptom of this except that, with -j1, t/355-deps-cyclic
would not finish until some of the later tests finished, which was
surprising.
To fix it, let's just check for a cyclic dependency first, then release
the token only once we're sure things are sane.
2018-11-13 06:54:31 -05:00
|
|
|
self.check()
|
2018-11-23 19:35:42 -05:00
|
|
|
assert not self.owned
|
2010-11-19 03:03:05 -08:00
|
|
|
try:
|
2018-11-23 19:35:42 -05:00
|
|
|
fcntl.lockf(_lockfile, fcntl.LOCK_EX|fcntl.LOCK_NB, 1, self.fid)
|
2010-12-10 02:58:13 -08:00
|
|
|
except IOError, e:
|
|
|
|
|
if e.errno in (errno.EAGAIN, errno.EACCES):
|
|
|
|
|
pass # someone else has it locked
|
2010-11-19 03:03:05 -08:00
|
|
|
else:
|
|
|
|
|
raise
|
2010-12-10 02:58:13 -08:00
|
|
|
else:
|
|
|
|
|
self.owned = True
|
redo-log: capture and linearize the output of redo builds.
redo now saves the stderr from every .do script, for every target, into
a file in the .redo directory. That means you can look up the logs
from the most recent build of any target using the new redo-log
command, for example:
redo-log -r all
The default is to show logs non-recursively, that is, it'll show when a
target does redo-ifchange on another target, but it won't recurse into
the logs for the latter target. With -r (recursive), it does. With -u
(unchanged), it does even if redo-ifchange discovered that the target
was already up-to-date; in that case, it prints the logs of the *most
recent* time the target was generated.
With --no-details, redo-log will show only the 'redo' lines, not the
other log messages. For very noisy build systems (like recursing into
a 'make' instance) this can be helpful to get an overview of what
happened, without all the cruft.
You can use the -f (follow) option like tail -f, to follow a build
that's currently in progress until it finishes. redo itself spins up a
copy of redo-log -r -f while it runs, so you can see what's going on.
Still broken in this version:
- No man page or new tests yet.
- ANSI colors don't yet work (unless you use --raw-logs, which gives
the old-style behaviour).
- You can't redirect the output of a sub-redo to a file or a
pipe right now, because redo-log is eating it.
- The regex for matching 'redo' lines in the log is very gross.
Instead, we should put the raw log files in a more machine-parseable
format, and redo-log should turn that into human-readable format.
- redo-log tries to "linearize" the logs, which makes them
comprehensible even for a large parallel build. It recursively shows
log messages for each target in depth-first tree order (by tracing
into a new target every time it sees a 'redo' line). This works
really well, but in some specific cases, the "topmost" redo instance
can get stuck waiting for a jwack token, which makes it look like the
whole build has stalled, when really redo-log is just waiting a long
time for a particular subprocess to be able to continue. We'll need to
add a specific workaround for that.
2018-11-03 22:09:18 -04:00
|
|
|
return self.owned
|
2010-11-19 03:03:05 -08:00
|
|
|
|
redo-log: prioritize the "foreground" process.
When running a parallel build, redo-log -f (which is auto-started by
redo) tries to traverse through the logs depth first, in the order
parent processes started subprocesses. This works pretty well, but if
its dependencies are locked, a process might have to give up its
jobserver token while other stuff builds its dependencies. After the
dependency finishes, the parent might not be able to get a token for
quite some time, and the logs will appear to stop.
To prevent this from happening, we can instantiate up to one "cheater"
token, only in the foreground process (the one locked by redo-log -f),
which will allow it to continue running, albeit a bit slowly (since it
only has one token out of possibly many). When the process finishes,
we then destroy the fake token. It gets a little complicated; see
explanation at the top of jwack.py.
2018-11-17 04:32:09 -05:00
|
|
|
def waitlock(self, shared=False):
|
2018-12-14 08:38:53 +00:00
|
|
|
"""Try to acquire our lock, and wait if it's currently locked.
|
|
|
|
|
|
|
|
|
|
If shared=True, acquires a shared lock (which can be shared with
|
|
|
|
|
other shared locks; used by redo-log). Otherwise, acquires an
|
|
|
|
|
exclusive lock.
|
|
|
|
|
"""
|
Cyclic dependency checker: don't give up token in common case.
The way the code was written, we'd give up our token, detect a cyclic
dependency, and then try to get our token back before exiting. Even
with -j1, the temporary token release allowed any parent up the tree to
continue running jobs, so it would take an arbitrary amount of time
before we could exit (and report an error code to the parent).
There was no visible symptom of this except that, with -j1, t/355-deps-cyclic
would not finish until some of the later tests finished, which was
surprising.
To fix it, let's just check for a cyclic dependency first, then release
the token only once we're sure things are sane.
2018-11-13 06:54:31 -05:00
|
|
|
self.check()
|
2018-11-23 19:35:42 -05:00
|
|
|
assert not self.owned
|
2018-12-02 23:15:37 -05:00
|
|
|
fcntl.lockf(
|
|
|
|
|
_lockfile,
|
redo-log: prioritize the "foreground" process.
When running a parallel build, redo-log -f (which is auto-started by
redo) tries to traverse through the logs depth first, in the order
parent processes started subprocesses. This works pretty well, but if
its dependencies are locked, a process might have to give up its
jobserver token while other stuff builds its dependencies. After the
dependency finishes, the parent might not be able to get a token for
quite some time, and the logs will appear to stop.
To prevent this from happening, we can instantiate up to one "cheater"
token, only in the foreground process (the one locked by redo-log -f),
which will allow it to continue running, albeit a bit slowly (since it
only has one token out of possibly many). When the process finishes,
we then destroy the fake token. It gets a little complicated; see
explanation at the top of jwack.py.
2018-11-17 04:32:09 -05:00
|
|
|
fcntl.LOCK_SH if shared else fcntl.LOCK_EX,
|
2018-11-23 19:35:42 -05:00
|
|
|
1, self.fid)
|
2010-12-10 02:58:13 -08:00
|
|
|
self.owned = True
|
2018-12-02 23:15:37 -05:00
|
|
|
|
2010-11-19 03:03:05 -08:00
|
|
|
def unlock(self):
|
2018-12-14 08:38:53 +00:00
|
|
|
"""Release the lock, which we must currently own."""
|
2010-11-19 03:03:05 -08:00
|
|
|
if not self.owned:
|
2018-12-02 23:15:37 -05:00
|
|
|
raise Exception("can't unlock %r - we don't own it"
|
|
|
|
|
% self.fid)
|
2018-11-23 19:35:42 -05:00
|
|
|
fcntl.lockf(_lockfile, fcntl.LOCK_UN, 1, self.fid)
|
2010-11-19 03:03:05 -08:00
|
|
|
self.owned = False
|
Workaround for completely broken file locking on Windows 10 WSL.
WSL (Windows Services for Linux) provides a Linux-kernel-compatible ABI
for userspace processes, but the current version doesn't not implement
fcntl() locks at all; it just always returns success. See
https://github.com/Microsoft/WSL/issues/1927.
This causes us three kinds of problem:
1. sqlite3 in WAL mode gives "OperationalError: locking protocol".
1b. Other sqlite3 journal modes also don't work when used by
multiple processes.
2. redo parallelism doesn't work, because we can't prevent the same
target from being build several times simultaneously.
3. "redo-log -f" doesn't work, since it can't tell whether the log
file it's tailing is "done" or not.
To fix #1, we switch the sqlite3 journal back to PERSIST instead of
WAL. We originally changed to WAL in commit 5156feae9d to reduce
deadlocks on MacOS. That was never adequately explained, but PERSIST
still acts weird on MacOS, so we'll only switch to PERSIST when we
detect that locking is definitely broken. Sigh.
To (mostly) fix #2, we disable any -j value > 1 when locking is broken.
This prevents basic forms of parallelism, but doesn't stop you from
re-entrantly starting other instances of redo. To fix that properly,
we need to switch to a different locking mechanism entirely, which is
tough in python. flock() locks probably work, for example, but
python's locks lie and just use fcntl locks for those.
To fix #3, we always force --no-log mode when we find that locking is
broken.
2019-01-02 14:18:51 -05:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def detect_broken_locks():
|
|
|
|
|
"""Detect Windows WSL's completely broken fcntl() locks.
|
|
|
|
|
|
|
|
|
|
Symptom: locking a file always returns success, even if other processes
|
|
|
|
|
also think they have it locked. See
|
|
|
|
|
https://github.com/Microsoft/WSL/issues/1927 for more details.
|
|
|
|
|
|
|
|
|
|
Bug exists at least in WSL "4.4.0-17134-Microsoft #471-Microsoft".
|
|
|
|
|
|
|
|
|
|
Returns true if broken, false otherwise.
|
|
|
|
|
"""
|
|
|
|
|
pl = Lock(0)
|
|
|
|
|
# We wait for the lock here, just in case others are doing
|
|
|
|
|
# this test at the same time.
|
|
|
|
|
pl.waitlock(shared=False)
|
|
|
|
|
pid = os.fork()
|
|
|
|
|
if pid:
|
|
|
|
|
# parent
|
|
|
|
|
_, rv = os.waitpid(pid, 0)
|
|
|
|
|
ok = os.WIFEXITED(rv) and not os.WEXITSTATUS(rv)
|
|
|
|
|
return not ok
|
|
|
|
|
else:
|
|
|
|
|
# child
|
|
|
|
|
try:
|
|
|
|
|
# Doesn't actually unlock, since child process doesn't own it
|
|
|
|
|
pl.unlock()
|
|
|
|
|
del pl
|
|
|
|
|
cl = Lock(0)
|
|
|
|
|
# parent is holding lock, which should prevent us from getting it.
|
|
|
|
|
owned = cl.trylock()
|
|
|
|
|
if owned:
|
|
|
|
|
# Got the lock? Yikes, the locking system is broken!
|
|
|
|
|
os._exit(1)
|
|
|
|
|
else:
|
|
|
|
|
# Failed to get the lock? Good, the parent owns it.
|
|
|
|
|
os._exit(0)
|
|
|
|
|
except Exception: # pylint: disable=broad-except
|
|
|
|
|
import traceback
|
|
|
|
|
traceback.print_exc()
|
|
|
|
|
finally:
|
|
|
|
|
os._exit(99)
|