Switch state.py to use sqlite3 instead of filesystem-based stamps.

It passes all tests when run serialized, but still gives weird errors
(OperationalError: database is locked) when run with -j5.  sqlite3 shouldn't
be barfing just because the database is locked, since the default timeout is
5 seconds, and it's dying *way* faster than that.
This commit is contained in:
Avery Pennarun 2010-12-07 02:17:22 -08:00
commit a62bd50d44
6 changed files with 220 additions and 157 deletions

View file

@ -20,10 +20,10 @@ def _find_do_file(t):
for dofile,basename,ext in _possible_do_files(t):
debug2('%s: %s ?\n' % (t, dofile))
if os.path.exists(dofile):
state.add_dep(t, 'm', dofile)
state.File(name=t).add_dep('m', dofile)
return dofile,basename,ext
else:
state.add_dep(t, 'c', dofile)
state.File(name=t).add_dep('c', dofile)
return None,None,None
@ -53,12 +53,13 @@ class BuildJob:
def start(self):
assert(self.lock.owned)
t = self.t
f = state.File(name=t)
tmpname = self.tmpname
if not self.shouldbuildfunc(t):
# target doesn't need to be built; skip the whole task
return self._after2(0)
if (os.path.exists(t) and not os.path.exists(t + '/.')
and not state.is_generated(t)):
and not f.is_generated):
# an existing source file that was not generated by us.
# This step is mentioned by djb in his notes.
# For example, a rule called default.c.do could be used to try
@ -67,20 +68,21 @@ class BuildJob:
# FIXME: always refuse to redo any file that was modified outside
# of redo? That would make it easy for someone to override a
# file temporarily, and could be undone by deleting the file.
state.unmark_as_generated(t)
state.stamp_and_maybe_built(t)
debug2("-- static (%r)\n" % t)
f.set_static()
f.save()
return self._after2(0)
state.start(t)
f.zap_deps()
(dofile, basename, ext) = _find_do_file(t)
if not dofile:
if os.path.exists(t):
state.unmark_as_generated(t)
state.stamp_and_maybe_built(t)
f.is_generated = False
f.set_static()
f.save()
return self._after2(0)
else:
err('no rule to make %r\n' % t)
return self._after2(1)
state.stamp_and_maybe_built(dofile)
unlink(tmpname)
ffd = os.open(tmpname, os.O_CREAT|os.O_RDWR|os.O_EXCL, 0666)
close_on_exec(ffd, True)
@ -97,13 +99,19 @@ class BuildJob:
if vars.VERBOSE or vars.XTRACE: log_('\n')
log('%s\n' % _nice(t))
self.argv = argv
f.is_generated = True
f.save()
dof = state.File(name=dofile)
dof.set_static()
dof.save()
jwack.start_job(t, self._do_subproc, self._after)
def _do_subproc(self):
# careful: REDO_PWD was the PWD relative to the STARTPATH at the time
# we *started* building the current target; but that target ran
# redo-ifchange, and it might have done it from a different directory
# than we started it in. So os.getcwd() might be != REDO_PWD right now.
# than we started it in. So os.getcwd() might be != REDO_PWD right
# now.
dn = os.path.dirname(self.t)
newp = os.path.realpath(dn)
os.environ['REDO_PWD'] = state.relpath(newp, vars.STARTDIR)
@ -153,11 +161,17 @@ class BuildJob:
os.rename(tmpname, t)
else:
unlink(tmpname)
state.built(t)
state.stamp(t)
sf = state.File(name=t)
sf.is_generated=True
sf.update_stamp()
sf.set_changed()
sf.save()
else:
unlink(tmpname)
state.unstamp(t)
sf = state.File(name=t)
sf.stamp = None
sf.set_changed()
sf.save()
f.close()
if rv != 0:
err('%s: exit code %d\n' % (_nice(t),rv))
@ -226,7 +240,7 @@ def main(targets, shouldbuildfunc):
assert(lock.owned)
if vars.DEBUG_LOCKS:
log('%s (...unlocked!)\n' % _nice(t))
if state.stamped(t) == None:
if state.File(name=t).stamp == None:
err('%s: failed in another thread\n' % _nice(t))
retcode[0] = 2
lock.unlock()

View file

@ -4,58 +4,59 @@ import vars, state, builder, jwack
from helpers import debug, debug2, err, mkdirp, unlink
def dirty_deps(t, depth):
try:
st = os.stat(t)
realtime = st.st_mtime
except OSError:
st = None
realtime = 0
def dirty_deps(f, depth, max_changed):
debug('%s?%s\n' % (depth, f.name))
debug('%s?%s\n' % (depth, t))
if state.isbuilt(t):
if f.changed_runid == None:
debug('%s-- DIRTY (never built)\n' % depth)
return True
if f.changed_runid > max_changed:
debug('%s-- DIRTY (built)\n' % depth)
return True # has already been built during this session
if state.ismarked(t):
debug('%s-- CLEAN (marked)\n' % depth)
return True # has been built more recently than parent
if f.is_checked():
debug('%s-- CLEAN (checked)\n' % depth)
return False # has already been checked during this session
stamptime = state.stamped(t)
if stamptime == None:
if not f.stamp:
debug('%s-- DIRTY (no stamp)\n' % depth)
return True
if stamptime != realtime and not (st and stat.S_ISDIR(st.st_mode)):
if f.stamp != f.read_stamp():
debug('%s-- DIRTY (mtime)\n' % depth)
return True
for mode,name in state.deps(t):
for mode,name in f.deps():
if mode == 'c':
if os.path.exists(name):
debug('%s-- DIRTY (created)\n' % depth)
return True
elif mode == 'm':
if dirty_deps(os.path.join(vars.BASE, name), depth + ' '):
f2 = state.File(name=os.path.join(vars.BASE, name))
if dirty_deps(f2, depth = depth + ' ',
max_changed = f.changed_runid):
debug('%s-- DIRTY (sub)\n' % depth)
state.unstamp(t) # optimization for future callers
return True
state.mark(t)
f.set_checked()
f.save()
return False
def should_build(t):
return not state.isbuilt(t) and dirty_deps(t, depth = '')
f = state.File(name=t)
return dirty_deps(f, depth = '', max_changed = vars.RUNID)
rv = 202
try:
me = os.path.join(vars.STARTDIR,
os.path.join(vars.PWD, vars.TARGET))
f = state.File(name=me)
debug2('TARGET: %r %r %r\n' % (vars.STARTDIR, vars.PWD, vars.TARGET))
try:
targets = sys.argv[1:]
for t in targets:
state.add_dep(me, 'm', t)
f.add_dep('m', t)
f.save()
rv = builder.main(targets, should_build)
finally:
jwack.force_return_tokens()

View file

@ -5,11 +5,12 @@ from helpers import err, mkdirp
try:
me = state.File(name=vars.TARGET)
for t in sys.argv[1:]:
if os.path.exists(t):
err('redo-ifcreate: error: %r already exists\n' % t)
sys.exit(1)
else:
state.add_dep(vars.TARGET, 'c', t)
me.add_dep('c', t)
except KeyboardInterrupt:
sys.exit(200)

277
state.py
View file

@ -1,20 +1,71 @@
import sys, os, errno, glob
import sys, os, errno, glob, stat, sqlite3
import vars
from helpers import unlink, err, debug2, debug3, mkdirp, close_on_exec
SCHEMA_VER=7
_db = None
def db():
global _db
if _db:
return _db
dbdir = '%s/.redo' % vars.BASE
dbfile = '%s/db.sqlite3' % dbdir
mkdirp(dbdir)
must_create = not os.path.exists(dbfile)
if not must_create:
_db = sqlite3.connect(dbfile)
try:
row = _db.cursor().execute("select version from Schema").fetchone()
except sqlite3.OperationalError:
row = None
ver = row and row[0] or None
if ver != SCHEMA_VER:
err("state database: discarding v%s (wanted v%s)\n"
% (ver, SCHEMA_VER))
must_create = True
_db = None
if must_create:
unlink(dbfile)
_db = sqlite3.connect(dbfile)
_db.execute("create table Schema (version int)")
_db.execute("create table Runid "
" (id integer primary key autoincrement)")
_db.execute("create table Files ("
" name not null primary key, "
" is_generated int, "
" checked_runid int, "
" changed_runid int, "
" stamp, csum)")
_db.execute("create table Deps "
" (target int, source int, mode not null, primary key (target,source))")
#_db.execute("create unique index Files_name on Files (name)")
#_db.execute("create unique index Deps_ix on Deps (target, source)")
_db.execute("create index Deps_src on Deps (source)")
_db.execute("insert into Schema (version) values (?)", [SCHEMA_VER])
_db.execute("insert into Runid default values")
_db.execute("insert into Runid default values")
_db.commit()
if not vars.RUNID:
_db.execute("insert into Runid default values")
_db.commit()
vars.RUNID = _db.execute("select last_insert_rowid()").fetchone()[0]
os.environ['REDO_RUNID'] = str(vars.RUNID)
_db.execute("pragma journal_mode = PERSIST")
_db.execute("pragma synchronous = off")
return _db
def init():
# FIXME: just wiping out all the locks is kind of cheating. But we
# only do this from the toplevel redo process, so unless the user
# deliberately starts more than one redo on the same repository, it's
# sort of ok.
mkdirp('%s/.redo' % vars.BASE)
db()
for f in glob.glob('%s/.redo/lock*' % vars.BASE):
os.unlink(f)
for f in glob.glob('%s/.redo/mark^*' % vars.BASE):
os.unlink(f)
for f in glob.glob('%s/.redo/built^*' % vars.BASE):
os.unlink(f)
_insane = None
@ -46,7 +97,7 @@ def relpath(t, base):
return '/'.join(tparts)
def _sname(typ, t):
def xx_sname(typ, t):
# FIXME: t.replace(...) is non-reversible and non-unique here!
tnew = relpath(t, vars.BASE)
v = vars.BASE + ('/.redo/%s^%s' % (typ, tnew.replace('/', '^')))
@ -55,128 +106,120 @@ def _sname(typ, t):
return v
def add_dep(t, mode, dep):
sn = _sname('dep', t)
reldep = relpath(dep, vars.BASE)
debug2('add-dep: %r < %s %r\n' % (sn, mode, reldep))
class File(object):
__slots__ = ['id', 'name', 'is_generated',
'checked_runid', 'changed_runid',
'stamp', 'csum']
open(sn, 'a').write('%s %s\n' % (mode, reldep))
def deps(t):
for line in open(_sname('dep', t)).readlines():
assert(line[0] in ('c','m'))
assert(line[1] == ' ')
assert(line[-1] == '\n')
mode = line[0]
name = line[2:-1]
yield mode,name
def _stampname(t):
return _sname('stamp', t)
def stamp(t):
mark(t)
stampfile = _stampname(t)
newstampfile = _sname('stamp' + str(os.getpid()), t)
depfile = _sname('dep', t)
if not os.path.exists(vars.BASE + '/.redo'):
# .redo might not exist in a 'make clean' target
return
open(newstampfile, 'w').close()
try:
mtime = os.stat(t).st_mtime
except OSError:
mtime = 0
os.utime(newstampfile, (mtime, mtime))
os.rename(newstampfile, stampfile)
open(depfile, 'a').close()
def unstamp(t):
unlink(_stampname(t))
unlink(_sname('dep', t))
def unmark_as_generated(t):
unstamp(t)
unlink(_sname('gen', t))
def stamped(t):
try:
stamptime = os.stat(_stampname(t)).st_mtime
except OSError, e:
if e.errno == errno.ENOENT:
return None
def __init__(self, id=None, name=None):
q = ('select rowid, name, is_generated, checked_runid, changed_runid, '
' stamp, csum '
' from Files ')
if id != None:
q += 'where rowid=?'
l = [id]
elif name != None:
name = relpath(name, vars.BASE)
q += 'where name=?'
l = [name]
else:
raise
return stamptime
raise Exception('name or id must be set')
d = db()
row = d.execute(q, l).fetchone()
if not row:
if not name:
raise Exception('File with id=%r not found and '
'name not given' % id)
d.execute('insert into Files (name) values (?)', [name])
d.commit()
row = d.execute(q, l).fetchone()
assert(row)
(self.id, self.name, self.is_generated,
self.checked_runid, self.changed_runid,
self.stamp, self.csum) = row
def save(self):
if not os.path.exists('%s/.redo' % vars.BASE):
# this might happen if 'make clean' removes the .redo dir
return
d = db()
d.execute('update Files set '
' is_generated=?, checked_runid=?, changed_runid=?, '
' stamp=?, csum=? '
' where rowid=?',
[self.is_generated, self.checked_runid, self.changed_runid,
self.stamp, self.csum,
self.id])
d.commit()
def built(t):
try:
open(_sname('built', t), 'w').close()
except IOError, e:
if e.errno == errno.ENOENT:
pass # may happen if someone deletes our .redo dir
def set_checked(self):
self.checked_runid = vars.RUNID
def set_changed(self):
debug2('BUILT: %r (%r)\n' % (self.name, self.stamp))
self.changed_runid = vars.RUNID
def set_static(self):
self.update_stamp()
def update_stamp(self):
newstamp = self.read_stamp()
if newstamp != self.stamp:
debug2("STAMP: %s: %r -> %r\n" % (self.name, self.stamp, newstamp))
self.stamp = newstamp
self.set_changed()
def is_changed(self):
return self.changed_runid and self.changed_runid >= vars.RUNID
def is_checked(self):
return (self.checked_runid and self.checked_runid >= vars.RUNID
and not (self.changed_runid
and self.changed_runid >= self.checked_runid))
def deps(self):
q = "select mode, source from Deps where target=?"
for mode,source_id in db().execute(q, [self.id]):
assert(mode in ('c', 'm'))
name = File(id=source_id).name
yield mode,name
def zap_deps(self):
debug2('zap-deps: %r\n' % self.name)
db().execute('delete from Deps where target=?', [self.id])
db().commit()
def add_dep(self, mode, dep):
src = File(name=dep)
reldep = relpath(dep, vars.BASE)
debug2('add-dep: %r < %s %r\n' % (self.name, mode, reldep))
assert(src.name == reldep)
d = db()
d.execute("delete from Deps where target=? and source=?",
[self.id, src.id])
d.execute("insert into Deps "
" (target, mode, source) values (?,?,?)",
[self.id, mode, src.id])
d.commit()
def read_stamp(self):
try:
st = os.stat(os.path.join(vars.BASE, self.name))
except OSError:
return '0' # does not exist
if stat.S_ISDIR(st.st_mode):
return 'dir' # the timestamp of a directory is meaningless
else:
raise
# a "unique identifier" stamp for a regular file
return str((st.st_ctime, st.st_mtime, st.st_size, st.st_ino))
_builts = {}
def isbuilt(t):
if _builts.get(t):
return True
if os.path.exists(_sname('built', t)):
_builts[t] = True
return True
# stamps the given input file, but only considers it to have been "built" if its
# mtime has changed. This is useful for static (non-generated) files.
def stamp_and_maybe_built(t):
if stamped(t) != os.stat(t).st_mtime:
built(t)
stamp(t)
def mark(t):
try:
open(_sname('mark', t), 'w').close()
except IOError, e:
if e.errno == errno.ENOENT:
pass # may happen if someone deletes our .redo dir
else:
raise
_marks = {}
def ismarked(t):
if _marks.get(t):
return True
if os.path.exists(_sname('mark', t)):
_marks[t] = True
return True
def is_generated(t):
return os.path.exists(_sname('gen', t))
def start(t):
unstamp(t)
open(_sname('dep', t), 'w').close()
open(_sname('gen', t), 'w').close() # it's definitely a generated file
class Lock:
def __init__(self, t):
self.owned = False
self.rfd = self.wfd = None
self.lockname = _sname('lock', t)
self.lockname = xx_sname('lock', t)
def __del__(self):
if self.owned:

View file

@ -1,4 +1,7 @@
#!/bin/sh
#echo "Flushing redo cache..." >&2
find "$REDO_BASE/.redo" -name 'built^*' -o -name 'mark^*' |
xargs rm -f >&2
(
echo "update Files set checked_runid=null;"
echo "update Files set changed_runid=changed_runid-1;"
#echo "update Files set stamp='dirty' where id in (select distinct target from Deps);"
) | sqlite3 "$REDO_BASE/.redo/db.sqlite3"

View file

@ -18,6 +18,7 @@ XTRACE = os.environ.get('REDO_XTRACE', '') and 1 or 0
KEEP_GOING = os.environ.get('REDO_KEEP_GOING', '') and 1 or 0
SHUFFLE = os.environ.get('REDO_SHUFFLE', '') and 1 or 0
STARTDIR = os.environ['REDO_STARTDIR']
RUNID = atoi.atoi(os.environ.get('REDO_RUNID')) or None
BASE = os.environ['REDO_BASE']
while BASE and BASE.endswith('/'):
BASE = BASE[:-1]