Switch state.py to use sqlite3 instead of filesystem-based stamps.

It passes all tests when run serialized, but still gives weird errors
(OperationalError: database is locked) when run with -j5.  sqlite3 shouldn't
be barfing just because the database is locked, since the default timeout is
5 seconds, and it's dying *way* faster than that.
This commit is contained in:
Avery Pennarun 2010-12-07 02:17:22 -08:00
commit a62bd50d44
6 changed files with 220 additions and 157 deletions

277
state.py
View file

@ -1,20 +1,71 @@
import sys, os, errno, glob
import sys, os, errno, glob, stat, sqlite3
import vars
from helpers import unlink, err, debug2, debug3, mkdirp, close_on_exec
SCHEMA_VER=7
_db = None
def db():
global _db
if _db:
return _db
dbdir = '%s/.redo' % vars.BASE
dbfile = '%s/db.sqlite3' % dbdir
mkdirp(dbdir)
must_create = not os.path.exists(dbfile)
if not must_create:
_db = sqlite3.connect(dbfile)
try:
row = _db.cursor().execute("select version from Schema").fetchone()
except sqlite3.OperationalError:
row = None
ver = row and row[0] or None
if ver != SCHEMA_VER:
err("state database: discarding v%s (wanted v%s)\n"
% (ver, SCHEMA_VER))
must_create = True
_db = None
if must_create:
unlink(dbfile)
_db = sqlite3.connect(dbfile)
_db.execute("create table Schema (version int)")
_db.execute("create table Runid "
" (id integer primary key autoincrement)")
_db.execute("create table Files ("
" name not null primary key, "
" is_generated int, "
" checked_runid int, "
" changed_runid int, "
" stamp, csum)")
_db.execute("create table Deps "
" (target int, source int, mode not null, primary key (target,source))")
#_db.execute("create unique index Files_name on Files (name)")
#_db.execute("create unique index Deps_ix on Deps (target, source)")
_db.execute("create index Deps_src on Deps (source)")
_db.execute("insert into Schema (version) values (?)", [SCHEMA_VER])
_db.execute("insert into Runid default values")
_db.execute("insert into Runid default values")
_db.commit()
if not vars.RUNID:
_db.execute("insert into Runid default values")
_db.commit()
vars.RUNID = _db.execute("select last_insert_rowid()").fetchone()[0]
os.environ['REDO_RUNID'] = str(vars.RUNID)
_db.execute("pragma journal_mode = PERSIST")
_db.execute("pragma synchronous = off")
return _db
def init():
# FIXME: just wiping out all the locks is kind of cheating. But we
# only do this from the toplevel redo process, so unless the user
# deliberately starts more than one redo on the same repository, it's
# sort of ok.
mkdirp('%s/.redo' % vars.BASE)
db()
for f in glob.glob('%s/.redo/lock*' % vars.BASE):
os.unlink(f)
for f in glob.glob('%s/.redo/mark^*' % vars.BASE):
os.unlink(f)
for f in glob.glob('%s/.redo/built^*' % vars.BASE):
os.unlink(f)
_insane = None
@ -46,7 +97,7 @@ def relpath(t, base):
return '/'.join(tparts)
def _sname(typ, t):
def xx_sname(typ, t):
# FIXME: t.replace(...) is non-reversible and non-unique here!
tnew = relpath(t, vars.BASE)
v = vars.BASE + ('/.redo/%s^%s' % (typ, tnew.replace('/', '^')))
@ -55,128 +106,120 @@ def _sname(typ, t):
return v
def add_dep(t, mode, dep):
sn = _sname('dep', t)
reldep = relpath(dep, vars.BASE)
debug2('add-dep: %r < %s %r\n' % (sn, mode, reldep))
class File(object):
__slots__ = ['id', 'name', 'is_generated',
'checked_runid', 'changed_runid',
'stamp', 'csum']
open(sn, 'a').write('%s %s\n' % (mode, reldep))
def deps(t):
for line in open(_sname('dep', t)).readlines():
assert(line[0] in ('c','m'))
assert(line[1] == ' ')
assert(line[-1] == '\n')
mode = line[0]
name = line[2:-1]
yield mode,name
def _stampname(t):
return _sname('stamp', t)
def stamp(t):
mark(t)
stampfile = _stampname(t)
newstampfile = _sname('stamp' + str(os.getpid()), t)
depfile = _sname('dep', t)
if not os.path.exists(vars.BASE + '/.redo'):
# .redo might not exist in a 'make clean' target
return
open(newstampfile, 'w').close()
try:
mtime = os.stat(t).st_mtime
except OSError:
mtime = 0
os.utime(newstampfile, (mtime, mtime))
os.rename(newstampfile, stampfile)
open(depfile, 'a').close()
def unstamp(t):
unlink(_stampname(t))
unlink(_sname('dep', t))
def unmark_as_generated(t):
unstamp(t)
unlink(_sname('gen', t))
def stamped(t):
try:
stamptime = os.stat(_stampname(t)).st_mtime
except OSError, e:
if e.errno == errno.ENOENT:
return None
def __init__(self, id=None, name=None):
q = ('select rowid, name, is_generated, checked_runid, changed_runid, '
' stamp, csum '
' from Files ')
if id != None:
q += 'where rowid=?'
l = [id]
elif name != None:
name = relpath(name, vars.BASE)
q += 'where name=?'
l = [name]
else:
raise
return stamptime
raise Exception('name or id must be set')
d = db()
row = d.execute(q, l).fetchone()
if not row:
if not name:
raise Exception('File with id=%r not found and '
'name not given' % id)
d.execute('insert into Files (name) values (?)', [name])
d.commit()
row = d.execute(q, l).fetchone()
assert(row)
(self.id, self.name, self.is_generated,
self.checked_runid, self.changed_runid,
self.stamp, self.csum) = row
def save(self):
if not os.path.exists('%s/.redo' % vars.BASE):
# this might happen if 'make clean' removes the .redo dir
return
d = db()
d.execute('update Files set '
' is_generated=?, checked_runid=?, changed_runid=?, '
' stamp=?, csum=? '
' where rowid=?',
[self.is_generated, self.checked_runid, self.changed_runid,
self.stamp, self.csum,
self.id])
d.commit()
def built(t):
try:
open(_sname('built', t), 'w').close()
except IOError, e:
if e.errno == errno.ENOENT:
pass # may happen if someone deletes our .redo dir
else:
raise
_builts = {}
def isbuilt(t):
if _builts.get(t):
return True
if os.path.exists(_sname('built', t)):
_builts[t] = True
return True
# stamps the given input file, but only considers it to have been "built" if its
# mtime has changed. This is useful for static (non-generated) files.
def stamp_and_maybe_built(t):
if stamped(t) != os.stat(t).st_mtime:
built(t)
stamp(t)
def set_checked(self):
self.checked_runid = vars.RUNID
def mark(t):
try:
open(_sname('mark', t), 'w').close()
except IOError, e:
if e.errno == errno.ENOENT:
pass # may happen if someone deletes our .redo dir
def set_changed(self):
debug2('BUILT: %r (%r)\n' % (self.name, self.stamp))
self.changed_runid = vars.RUNID
def set_static(self):
self.update_stamp()
def update_stamp(self):
newstamp = self.read_stamp()
if newstamp != self.stamp:
debug2("STAMP: %s: %r -> %r\n" % (self.name, self.stamp, newstamp))
self.stamp = newstamp
self.set_changed()
def is_changed(self):
return self.changed_runid and self.changed_runid >= vars.RUNID
def is_checked(self):
return (self.checked_runid and self.checked_runid >= vars.RUNID
and not (self.changed_runid
and self.changed_runid >= self.checked_runid))
def deps(self):
q = "select mode, source from Deps where target=?"
for mode,source_id in db().execute(q, [self.id]):
assert(mode in ('c', 'm'))
name = File(id=source_id).name
yield mode,name
def zap_deps(self):
debug2('zap-deps: %r\n' % self.name)
db().execute('delete from Deps where target=?', [self.id])
db().commit()
def add_dep(self, mode, dep):
src = File(name=dep)
reldep = relpath(dep, vars.BASE)
debug2('add-dep: %r < %s %r\n' % (self.name, mode, reldep))
assert(src.name == reldep)
d = db()
d.execute("delete from Deps where target=? and source=?",
[self.id, src.id])
d.execute("insert into Deps "
" (target, mode, source) values (?,?,?)",
[self.id, mode, src.id])
d.commit()
def read_stamp(self):
try:
st = os.stat(os.path.join(vars.BASE, self.name))
except OSError:
return '0' # does not exist
if stat.S_ISDIR(st.st_mode):
return 'dir' # the timestamp of a directory is meaningless
else:
raise
_marks = {}
def ismarked(t):
if _marks.get(t):
return True
if os.path.exists(_sname('mark', t)):
_marks[t] = True
return True
def is_generated(t):
return os.path.exists(_sname('gen', t))
def start(t):
unstamp(t)
open(_sname('dep', t), 'w').close()
open(_sname('gen', t), 'w').close() # it's definitely a generated file
# a "unique identifier" stamp for a regular file
return str((st.st_ctime, st.st_mtime, st.st_size, st.st_ino))
class Lock:
def __init__(self, t):
self.owned = False
self.rfd = self.wfd = None
self.lockname = _sname('lock', t)
self.lockname = xx_sname('lock', t)
def __del__(self):
if self.owned: