It's time to start preparing for a version of redo that doesn't work unless we build it first (because it will rely on C modules, and eventually be rewritten in C altogether). To get rolling, remove the old-style symlinks to the main programs, and rename those programs from redo-*.py to redo/cmd_*.py. We'll also move all library functions into the redo/ dir, which is a more python-style naming convention. Previously, install.do was generating wrappers for installing in /usr/bin, which extend sys.path and then import+run the right file. This made "installed" redo work quite differently from running redo inside its source tree. Instead, let's always generate the wrappers in bin/, and not make anything executable except those wrappers. Since we're generating wrappers anyway, let's actually auto-detect the right version of python for the running system; distros can't seem to agree on what to call their python2 binaries (sigh). We'll fill in the right #! shebang lines. Since we're doing that, we can stop using /usr/bin/env, which will a) make things slightly faster, and b) let us use "python -S", which tells python not to load a bunch of extra crap we're not using, thus improving startup times. Annoyingly, we now have to build redo using minimal/do, then run the tests using bin/redo. To make this less annoying, we add a toplevel ./do script that knows the right steps, and a Makefile (whee!) for people who are used to typing 'make' and 'make test' and 'make clean'.
501 lines
16 KiB
Python
501 lines
16 KiB
Python
import sys, os, errno, stat, fcntl, sqlite3
|
|
import vars
|
|
from helpers import unlink, close_on_exec, join
|
|
from logs import warn, debug2, debug3
|
|
|
|
# When the module is imported, change the process title.
|
|
# We do it here because this module is imported by all the scripts.
|
|
try:
|
|
from setproctitle import setproctitle
|
|
except ImportError:
|
|
pass
|
|
else:
|
|
cmdline = sys.argv[:]
|
|
cmdline[0] = os.path.splitext(os.path.basename(cmdline[0]))[0]
|
|
setproctitle(" ".join(cmdline))
|
|
|
|
SCHEMA_VER = 2
|
|
TIMEOUT = 60
|
|
|
|
ALWAYS = '//ALWAYS' # an invalid filename that is always marked as dirty
|
|
STAMP_DIR = 'dir' # the stamp of a directory; mtime is unhelpful
|
|
STAMP_MISSING = '0' # the stamp of a nonexistent file
|
|
|
|
LOG_LOCK_MAGIC = 0x10000000 # fid offset for "log locks"
|
|
|
|
|
|
class CyclicDependencyError(Exception):
|
|
pass
|
|
|
|
|
|
def _connect(dbfile):
|
|
_db = sqlite3.connect(dbfile, timeout=TIMEOUT)
|
|
_db.execute("pragma synchronous = off")
|
|
_db.execute("pragma journal_mode = WAL")
|
|
_db.text_factory = str
|
|
return _db
|
|
|
|
|
|
# We need to keep a process-wide fd open for all access to the lock file.
|
|
# Because POSIX lock files are insane, if you close *one* fd pointing
|
|
# at a given inode, it will immediately release *all* locks on that inode from
|
|
# your pid, even if those locks are on a different fd. This is literally
|
|
# never what you want. To avoid the problem, always use just a single fd.
|
|
_lockfile = None
|
|
|
|
|
|
_db = None
|
|
def db():
|
|
global _db, _lockfile
|
|
if _db:
|
|
return _db
|
|
|
|
dbdir = '%s/.redo' % vars.BASE
|
|
dbfile = '%s/db.sqlite3' % dbdir
|
|
try:
|
|
os.mkdir(dbdir)
|
|
except OSError, e:
|
|
if e.errno == errno.EEXIST:
|
|
pass # if it exists, that's okay
|
|
else:
|
|
raise
|
|
|
|
_lockfile = os.open(os.path.join(vars.BASE, '.redo/locks'),
|
|
os.O_RDWR | os.O_CREAT, 0666)
|
|
close_on_exec(_lockfile, True)
|
|
|
|
must_create = not os.path.exists(dbfile)
|
|
if not must_create:
|
|
_db = _connect(dbfile)
|
|
try:
|
|
row = _db.cursor().execute("select version from Schema").fetchone()
|
|
except sqlite3.OperationalError:
|
|
row = None
|
|
ver = row and row[0] or None
|
|
if ver != SCHEMA_VER:
|
|
# Don't use err() here because this might happen before
|
|
# redo-log spawns.
|
|
sys.stderr.write(
|
|
'redo: %s: found v%s (expected v%s)\n'
|
|
% (dbfile, ver, SCHEMA_VER))
|
|
sys.stderr.write('redo: manually delete .redo dir to start over.\n')
|
|
sys.exit(1)
|
|
if must_create:
|
|
unlink(dbfile)
|
|
_db = _connect(dbfile)
|
|
_db.execute("create table Schema "
|
|
" (version int)")
|
|
_db.execute("create table Runid "
|
|
" (id integer primary key autoincrement)")
|
|
_db.execute("create table Files "
|
|
" (name not null primary key, "
|
|
" is_generated int, "
|
|
" is_override int, "
|
|
" checked_runid int, "
|
|
" changed_runid int, "
|
|
" failed_runid int, "
|
|
" stamp, "
|
|
" csum)")
|
|
_db.execute("create table Deps "
|
|
" (target int, "
|
|
" source int, "
|
|
" mode not null, "
|
|
" delete_me int, "
|
|
" primary key (target,source))")
|
|
_db.execute("insert into Schema (version) values (?)", [SCHEMA_VER])
|
|
# eat the '0' runid and File id.
|
|
# Because of the cheesy way t/flush-cache is implemented, leave a
|
|
# lot of runids available before the "first" one so that we
|
|
# can adjust cached values to be before the first value.
|
|
_db.execute("insert into Runid values (1000000000)")
|
|
_db.execute("insert into Files (name) values (?)", [ALWAYS])
|
|
|
|
if not vars.RUNID:
|
|
_db.execute("insert into Runid values "
|
|
" ((select max(id)+1 from Runid))")
|
|
vars.RUNID = _db.execute("select last_insert_rowid()").fetchone()[0]
|
|
os.environ['REDO_RUNID'] = str(vars.RUNID)
|
|
|
|
_db.commit()
|
|
return _db
|
|
|
|
|
|
def init():
|
|
db()
|
|
|
|
|
|
_wrote = 0
|
|
def _write(q, l):
|
|
if _insane:
|
|
return
|
|
global _wrote
|
|
_wrote += 1
|
|
db().execute(q, l)
|
|
|
|
|
|
def commit():
|
|
if _insane:
|
|
return
|
|
global _wrote
|
|
if _wrote:
|
|
db().commit()
|
|
_wrote = 0
|
|
|
|
|
|
def rollback():
|
|
if _insane:
|
|
return
|
|
global _wrote
|
|
if _wrote:
|
|
db().rollback()
|
|
_wrote = 0
|
|
|
|
|
|
def is_flushed():
|
|
return not _wrote
|
|
|
|
|
|
_insane = None
|
|
def check_sane():
|
|
global _insane
|
|
if not _insane:
|
|
_insane = not os.path.exists('%s/.redo' % vars.BASE)
|
|
return not _insane
|
|
|
|
|
|
_cwd = None
|
|
def relpath(t, base):
|
|
global _cwd
|
|
if not _cwd:
|
|
_cwd = os.getcwd()
|
|
t = os.path.normpath(os.path.join(_cwd, t))
|
|
base = os.path.normpath(base)
|
|
tparts = t.split('/')
|
|
bparts = base.split('/')
|
|
for tp, bp in zip(tparts, bparts):
|
|
if tp != bp:
|
|
break
|
|
tparts.pop(0)
|
|
bparts.pop(0)
|
|
while bparts:
|
|
tparts.insert(0, '..')
|
|
bparts.pop(0)
|
|
return join('/', tparts)
|
|
|
|
|
|
# Return a path for t, if cwd were the dirname of vars.TARGET.
|
|
# This is tricky! STARTDIR+PWD is the directory for the *dofile*, when
|
|
# the dofile was started. However, inside the dofile, someone may have done
|
|
# a chdir to anywhere else. vars.TARGET is relative to the dofile path, so
|
|
# we have to first figure out where the dofile was, then find TARGET relative
|
|
# to that, then find t relative to that.
|
|
#
|
|
# FIXME: find some cleaner terminology for all these different paths.
|
|
def target_relpath(t):
|
|
dofile_dir = os.path.abspath(os.path.join(vars.STARTDIR, vars.PWD))
|
|
target_dir = os.path.abspath(
|
|
os.path.dirname(os.path.join(dofile_dir, vars.TARGET)))
|
|
return relpath(t, target_dir)
|
|
|
|
|
|
def detect_override(stamp1, stamp2):
|
|
"""Determine if two stamps differ in a way that means manual override.
|
|
|
|
When two stamps differ at all, that means the source is dirty and so we
|
|
need to rebuild. If they differ in mtime or size, then someone has surely
|
|
edited the file, and we don't want to trample their changes.
|
|
|
|
But if the only difference is something else (like ownership, st_mode,
|
|
etc) then that might be a false positive; it's annoying to mark as
|
|
overridden in that case, so we return False. (It's still dirty though!)
|
|
"""
|
|
if stamp1 == stamp2:
|
|
return False
|
|
crit1 = stamp1.split('-', 2)[0:2]
|
|
crit2 = stamp2.split('-', 2)[0:2]
|
|
return crit1 != crit2
|
|
|
|
|
|
def warn_override(name):
|
|
warn('%s - you modified it; skipping\n' % name)
|
|
|
|
|
|
_file_cols = ['rowid', 'name', 'is_generated', 'is_override',
|
|
'checked_runid', 'changed_runid', 'failed_runid',
|
|
'stamp', 'csum']
|
|
class File(object):
|
|
# use this mostly to avoid accidentally assigning to typos
|
|
__slots__ = ['id'] + _file_cols[1:]
|
|
|
|
# These warnings are a result of the weird way this class is
|
|
# initialized, which we should fix, and then re-enable warning.
|
|
# pylint: disable=attribute-defined-outside-init
|
|
def _init_from_idname(self, id, name, allow_add):
|
|
q = ('select %s from Files ' % join(', ', _file_cols))
|
|
if id != None:
|
|
q += 'where rowid=?'
|
|
l = [id]
|
|
elif name != None:
|
|
name = (name == ALWAYS) and ALWAYS or relpath(name, vars.BASE)
|
|
q += 'where name=?'
|
|
l = [name]
|
|
else:
|
|
raise Exception('name or id must be set')
|
|
d = db()
|
|
row = d.execute(q, l).fetchone()
|
|
if not row:
|
|
if not name:
|
|
raise KeyError('No file with id=%r name=%r' % (id, name))
|
|
elif not allow_add:
|
|
raise KeyError('No file with name=%r' % (name,))
|
|
try:
|
|
_write('insert into Files (name) values (?)', [name])
|
|
except sqlite3.IntegrityError:
|
|
# some parallel redo probably added it at the same time; no
|
|
# big deal.
|
|
pass
|
|
row = d.execute(q, l).fetchone()
|
|
assert row
|
|
return self._init_from_cols(row)
|
|
|
|
def _init_from_cols(self, cols):
|
|
(self.id, self.name, self.is_generated, self.is_override,
|
|
self.checked_runid, self.changed_runid, self.failed_runid,
|
|
self.stamp, self.csum) = cols
|
|
if self.name == ALWAYS and self.changed_runid < vars.RUNID:
|
|
self.changed_runid = vars.RUNID
|
|
|
|
def __init__(self, id=None, name=None, cols=None, allow_add=True):
|
|
if cols:
|
|
self._init_from_cols(cols)
|
|
else:
|
|
self._init_from_idname(id, name, allow_add=allow_add)
|
|
|
|
def __repr__(self):
|
|
return "File(%r)" % (self.nicename(),)
|
|
|
|
def refresh(self):
|
|
self._init_from_idname(self.id, None, allow_add=False)
|
|
|
|
def save(self):
|
|
cols = join(', ', ['%s=?'%i for i in _file_cols[2:]])
|
|
_write('update Files set '
|
|
' %s '
|
|
' where rowid=?' % cols,
|
|
[self.is_generated, self.is_override,
|
|
self.checked_runid, self.changed_runid, self.failed_runid,
|
|
self.stamp, self.csum,
|
|
self.id])
|
|
|
|
def set_checked(self):
|
|
self.checked_runid = vars.RUNID
|
|
|
|
def set_checked_save(self):
|
|
self.set_checked()
|
|
self.save()
|
|
|
|
def set_changed(self):
|
|
debug2('BUILT: %r (%r)\n' % (self.name, self.stamp))
|
|
self.changed_runid = vars.RUNID
|
|
self.failed_runid = None
|
|
self.is_override = False
|
|
|
|
def set_failed(self):
|
|
debug2('FAILED: %r\n' % self.name)
|
|
self.update_stamp()
|
|
self.failed_runid = vars.RUNID
|
|
if self.stamp != STAMP_MISSING:
|
|
# if we failed and the target file still exists,
|
|
# then we're generated.
|
|
self.is_generated = True
|
|
else:
|
|
# if the target file now does *not* exist, then go back to
|
|
# treating this as a source file. Since it doesn't exist,
|
|
# if someone tries to rebuild it immediately, it'll go
|
|
# back to being a target. But if the file is manually
|
|
# created before that, we don't need a "manual override"
|
|
# warning.
|
|
self.is_generated = False
|
|
|
|
def set_static(self):
|
|
self.update_stamp(must_exist=True)
|
|
self.failed_runid = None
|
|
self.is_override = False
|
|
self.is_generated = False
|
|
|
|
def set_override(self):
|
|
self.update_stamp()
|
|
self.failed_runid = None
|
|
self.is_override = True
|
|
|
|
def update_stamp(self, must_exist=False):
|
|
newstamp = self.read_stamp()
|
|
if must_exist and newstamp == STAMP_MISSING:
|
|
raise Exception("%r does not exist" % self.name)
|
|
if newstamp != self.stamp:
|
|
debug2("STAMP: %s: %r -> %r\n" % (self.name, self.stamp, newstamp))
|
|
self.stamp = newstamp
|
|
self.set_changed()
|
|
|
|
def is_source(self):
|
|
if self.name.startswith('//'):
|
|
return False # special name, ignore
|
|
newstamp = self.read_stamp()
|
|
if (self.is_generated and
|
|
(not self.is_failed() or newstamp != STAMP_MISSING) and
|
|
not self.is_override and
|
|
self.stamp == newstamp):
|
|
# target is as we left it
|
|
return False
|
|
if ((not self.is_generated or self.stamp != newstamp) and
|
|
newstamp == STAMP_MISSING):
|
|
# target has gone missing after the last build.
|
|
# It's not usefully a source *or* a target.
|
|
return False
|
|
return True
|
|
|
|
def is_target(self):
|
|
if not self.is_generated:
|
|
return False
|
|
if self.is_source():
|
|
return False
|
|
return True
|
|
|
|
def is_checked(self):
|
|
return self.checked_runid and self.checked_runid >= vars.RUNID
|
|
|
|
def is_changed(self):
|
|
return self.changed_runid and self.changed_runid >= vars.RUNID
|
|
|
|
def is_failed(self):
|
|
return self.failed_runid and self.failed_runid >= vars.RUNID
|
|
|
|
def deps(self):
|
|
if self.is_override or not self.is_generated:
|
|
return
|
|
q = ('select Deps.mode, Deps.source, %s '
|
|
' from Files '
|
|
' join Deps on Files.rowid = Deps.source '
|
|
' where target=?' % join(', ', _file_cols[1:]))
|
|
for row in db().execute(q, [self.id]).fetchall():
|
|
mode = row[0]
|
|
cols = row[1:]
|
|
assert mode in ('c', 'm')
|
|
yield mode, File(cols=cols)
|
|
|
|
def zap_deps1(self):
|
|
debug2('zap-deps1: %r\n' % self.name)
|
|
_write('update Deps set delete_me=? where target=?', [True, self.id])
|
|
|
|
def zap_deps2(self):
|
|
debug2('zap-deps2: %r\n' % self.name)
|
|
_write('delete from Deps where target=? and delete_me=1', [self.id])
|
|
|
|
def add_dep(self, mode, dep):
|
|
src = File(name=dep)
|
|
debug3('add-dep: "%s" < %s "%s"\n' % (self.name, mode, src.name))
|
|
assert self.id != src.id
|
|
_write("insert or replace into Deps "
|
|
" (target, mode, source, delete_me) values (?,?,?,?)",
|
|
[self.id, mode, src.id, False])
|
|
|
|
def _read_stamp_st(self, statfunc):
|
|
try:
|
|
st = statfunc(os.path.join(vars.BASE, self.name))
|
|
except OSError:
|
|
return False, STAMP_MISSING
|
|
if stat.S_ISDIR(st.st_mode):
|
|
# directories change too much; detect only existence.
|
|
return False, STAMP_DIR
|
|
else:
|
|
# a "unique identifier" stamp for a regular file
|
|
return (
|
|
stat.S_ISLNK(st.st_mode),
|
|
'-'.join(str(s) for s in
|
|
('%.6f' % st.st_mtime, st.st_size, st.st_ino,
|
|
st.st_mode, st.st_uid, st.st_gid))
|
|
)
|
|
|
|
def read_stamp(self):
|
|
is_link, pre = self._read_stamp_st(os.lstat)
|
|
if is_link:
|
|
# if we're a symlink, we actually care about the link object
|
|
# itself, *and* the target of the link. If either changes,
|
|
# we're considered dirty.
|
|
#
|
|
# On the other hand, detect_override() doesn't care about the
|
|
# target of the link, only the link itself.
|
|
_, post = self._read_stamp_st(os.stat)
|
|
return pre + '+' + post
|
|
else:
|
|
return pre
|
|
|
|
def nicename(self):
|
|
return relpath(os.path.join(vars.BASE, self.name), vars.STARTDIR)
|
|
|
|
|
|
def files():
|
|
q = ('select %s from Files order by name' % join(', ', _file_cols))
|
|
for cols in db().execute(q).fetchall():
|
|
yield File(cols=cols)
|
|
|
|
|
|
def logname(fid):
|
|
"""Given the id of a File, return the filename of its build log."""
|
|
return os.path.join(vars.BASE, '.redo', 'log.%d' % fid)
|
|
|
|
|
|
# FIXME: I really want to use fcntl F_SETLK, F_SETLKW, etc here. But python
|
|
# doesn't do the lockdata structure in a portable way, so we have to use
|
|
# fcntl.lockf() instead. Usually this is just a wrapper for fcntl, so it's
|
|
# ok, but it doesn't have F_GETLK, so we can't report which pid owns the lock.
|
|
# The makes debugging a bit harder. When we someday port to C, we can do that.
|
|
_locks = {}
|
|
class Lock(object):
|
|
def __init__(self, fid):
|
|
self.owned = False
|
|
self.fid = fid
|
|
assert _lockfile >= 0
|
|
assert _locks.get(fid, 0) == 0
|
|
_locks[fid] = 1
|
|
|
|
def __del__(self):
|
|
_locks[self.fid] = 0
|
|
if self.owned:
|
|
self.unlock()
|
|
|
|
def check(self):
|
|
assert not self.owned
|
|
if str(self.fid) in vars.get_locks():
|
|
# Lock already held by parent: cyclic dependence
|
|
raise CyclicDependencyError()
|
|
|
|
def trylock(self):
|
|
self.check()
|
|
assert not self.owned
|
|
try:
|
|
fcntl.lockf(_lockfile, fcntl.LOCK_EX|fcntl.LOCK_NB, 1, self.fid)
|
|
except IOError, e:
|
|
if e.errno in (errno.EAGAIN, errno.EACCES):
|
|
pass # someone else has it locked
|
|
else:
|
|
raise
|
|
else:
|
|
self.owned = True
|
|
return self.owned
|
|
|
|
def waitlock(self, shared=False):
|
|
self.check()
|
|
assert not self.owned
|
|
fcntl.lockf(
|
|
_lockfile,
|
|
fcntl.LOCK_SH if shared else fcntl.LOCK_EX,
|
|
1, self.fid)
|
|
self.owned = True
|
|
|
|
def unlock(self):
|
|
if not self.owned:
|
|
raise Exception("can't unlock %r - we don't own it"
|
|
% self.fid)
|
|
fcntl.lockf(_lockfile, fcntl.LOCK_UN, 1, self.fid)
|
|
self.owned = False
|