From 84169c5d27cef1cdf17e59f1fee9e3e56747cb04 Mon Sep 17 00:00:00 2001 From: Avery Pennarun Date: Fri, 10 Dec 2010 02:58:13 -0800 Subject: [PATCH] Change locking stuff from fifos to fcntl.lockf(). This should reduce filesystem grinding a bit, and makes the code simpler. It's also theoretically a bit more portable, since I'm guessing fifo semantics aren't the same on win32 if we ever get there. Also, a major problem with the old fifo-based system is that if a redo process died without cleaning up after itself, it wouldn't delete its lockfiles, so we had to wipe them all at the beginning of each build. Now we don't; in theory, you can now have multiple copies of redo poking at the same tree at the same time and not stepping on each other. --- builder.py | 49 ++++++++++++++++-------------- state.py | 88 ++++++++++++++++++++---------------------------------- 2 files changed, 58 insertions(+), 79 deletions(-) diff --git a/builder.py b/builder.py index 8896ea3..16695cf 100644 --- a/builder.py +++ b/builder.py @@ -43,8 +43,9 @@ def _try_stat(filename): class BuildJob: - def __init__(self, t, lock, shouldbuildfunc, donefunc): - self.t = t + def __init__(self, t, sf, lock, shouldbuildfunc, donefunc): + self.t = t # original target name, not relative to vars.BASE + self.sf = sf self.tmpname = '%s.redo.tmp' % t self.lock = lock self.shouldbuildfunc = shouldbuildfunc @@ -54,13 +55,13 @@ class BuildJob: def start(self): assert(self.lock.owned) t = self.t - f = state.File(name=t) + sf = self.sf tmpname = self.tmpname if not self.shouldbuildfunc(t): # target doesn't need to be built; skip the whole task return self._after2(0) if (os.path.exists(t) and not os.path.exists(t + '/.') - and not f.is_generated): + and not sf.is_generated): # an existing source file that was not generated by us. # This step is mentioned by djb in his notes. # For example, a rule called default.c.do could be used to try @@ -70,16 +71,16 @@ class BuildJob: # of redo? That would make it easy for someone to override a # file temporarily, and could be undone by deleting the file. debug2("-- static (%r)\n" % t) - f.set_static() - f.save() + sf.set_static() + sf.save() return self._after2(0) - f.zap_deps() - (dofile, basename, ext) = _find_do_file(f) + sf.zap_deps() + (dofile, basename, ext) = _find_do_file(sf) if not dofile: if os.path.exists(t): - f.is_generated = False - f.set_static() - f.save() + sf.is_generated = False + sf.set_static() + sf.save() return self._after2(0) else: err('no rule to make %r\n' % t) @@ -100,8 +101,8 @@ class BuildJob: if vars.VERBOSE or vars.XTRACE: log_('\n') log('%s\n' % _nice(t)) self.argv = argv - f.is_generated = True - f.save() + sf.is_generated = True + sf.save() dof = state.File(name=dofile) dof.set_static() dof.save() @@ -165,14 +166,14 @@ class BuildJob: os.rename(tmpname, t) else: unlink(tmpname) - sf = state.File(name=t) + sf = self.sf sf.is_generated=True sf.update_stamp() sf.set_changed() sf.save() else: unlink(tmpname) - sf = state.File(name=t) + sf = self.sf sf.stamp = None sf.set_changed() sf.save() @@ -219,18 +220,19 @@ def main(targets, shouldbuildfunc): err('.redo directory disappeared; cannot continue.\n') retcode[0] = 205 break - lock = state.Lock(t) + f = state.File(name=t) + lock = state.Lock(f.id) lock.trylock() if not lock.owned: if vars.DEBUG_LOCKS: log('%s (locked...)\n' % _nice(t)) - locked.append(t) + locked.append((f.id,t)) else: - BuildJob(t, lock, shouldbuildfunc, done).start() + BuildJob(t, f, lock, shouldbuildfunc, done).start() # Now we've built all the "easy" ones. Go back and just wait on the - # remaining ones one by one. This is technically non-optimal; we could - # use select.select() to wait on more than one at a time. But it should + # remaining ones one by one. This is non-optimal; we could go faster if + # we could wait on multiple locks at once. But it should # be rare enough that it doesn't matter, and the logic is easier this way. while locked or jwack.running(): state.commit() @@ -244,8 +246,8 @@ def main(targets, shouldbuildfunc): err('.redo directory disappeared; cannot continue.\n') retcode[0] = 205 break - t = locked.pop(0) - lock = state.Lock(t) + fid,t = locked.pop(0) + lock = state.Lock(fid) lock.waitlock() assert(lock.owned) if vars.DEBUG_LOCKS: @@ -255,6 +257,7 @@ def main(targets, shouldbuildfunc): retcode[0] = 2 lock.unlock() else: - BuildJob(t, lock, shouldbuildfunc, done).start() + BuildJob(t, state.File(id=fid), lock, + shouldbuildfunc, done).start() state.commit() return retcode[0] diff --git a/state.py b/state.py index e75a5b0..dc3a7c5 100644 --- a/state.py +++ b/state.py @@ -1,4 +1,4 @@ -import sys, os, errno, glob, stat, sqlite3 +import sys, os, errno, glob, stat, fcntl, sqlite3 import vars from helpers import unlink, err, debug2, debug3, close_on_exec import helpers @@ -14,10 +14,12 @@ def _connect(dbfile): _db = None +_lockfile = None def db(): - global _db + global _db, _lockfile if _db: return _db + dbdir = '%s/.redo' % vars.BASE dbfile = '%s/db.sqlite3' % dbdir try: @@ -27,6 +29,11 @@ def db(): pass # if it exists, that's okay else: raise + + _lockfile = os.open(os.path.join(vars.BASE, '.redo/locks'), + os.O_RDWR | os.O_CREAT, 0666) + close_on_exec(_lockfile, True) + must_create = not os.path.exists(dbfile) if not must_create: _db = _connect(dbfile) @@ -60,7 +67,9 @@ def db(): " mode not null, " " primary key (target,source))") _db.execute("insert into Schema (version) values (?)", [SCHEMA_VER]) - _db.execute("insert into Runid default values") # eat the '0' runid + # eat the '0' runid and File id + _db.execute("insert into Runid default values") + _db.execute("insert into Files (name) values (?)", ['']) if not vars.RUNID: _db.execute("insert into Runid default values") @@ -72,13 +81,7 @@ def db(): def init(): - # FIXME: just wiping out all the locks is kind of cheating. But we - # only do this from the toplevel redo process, so unless the user - # deliberately starts more than one redo on the same repository, it's - # sort of ok. db() - for f in glob.glob('%s/.redo/lock*' % vars.BASE): - os.unlink(f) _wrote = 0 @@ -128,16 +131,8 @@ def relpath(t, base): return '/'.join(tparts) -def xx_sname(typ, t): - # FIXME: t.replace(...) is non-reversible and non-unique here! - tnew = relpath(t, vars.BASE) - v = vars.BASE + ('/.redo/%s^%s' % (typ, tnew.replace('/', '^'))) - if vars.DEBUG >= 3: - debug3('sname: (%r) %r -> %r\n' % (os.getcwd(), t, tnew)) - return v - - class File(object): + # use this mostly to avoid accidentally assigning to typos __slots__ = ['id', 'name', 'is_generated', 'checked_runid', 'changed_runid', 'stamp', 'csum'] @@ -247,62 +242,43 @@ class File(object): else: # a "unique identifier" stamp for a regular file return str((st.st_ctime, st.st_mtime, st.st_size, st.st_ino)) - +# FIXME: I really want to use fcntl F_SETLK, F_SETLKW, etc here. But python +# doesn't do the lockdata structure in a portable way, so we have to use +# fcntl.lockf() instead. Usually this is just a wrapper for fcntl, so it's +# ok, but it doesn't have F_GETLK, so we can't report which pid owns the lock. +# The makes debugging a bit harder. When we someday port to C, we can do that. class Lock: - def __init__(self, t): + def __init__(self, fid): + assert(_lockfile >= 0) self.owned = False - self.rfd = self.wfd = None - self.lockname = xx_sname('lock', t) + self.fid = fid def __del__(self): if self.owned: self.unlock() def trylock(self): + assert(not self.owned) try: - os.mkfifo(self.lockname, 0600) - self.owned = True - self.rfd = os.open(self.lockname, os.O_RDONLY|os.O_NONBLOCK) - self.wfd = os.open(self.lockname, os.O_WRONLY) - close_on_exec(self.rfd, True) - close_on_exec(self.wfd, True) - except OSError, e: - if e.errno == errno.EEXIST: - pass + fcntl.lockf(_lockfile, fcntl.LOCK_EX|fcntl.LOCK_NB, 1, self.fid) + except IOError, e: + if e.errno in (errno.EAGAIN, errno.EACCES): + pass # someone else has it locked else: raise + else: + self.owned = True def waitlock(self): - while not self.owned: - self.wait() - self.trylock() - assert(self.owned) + assert(not self.owned) + fcntl.lockf(_lockfile, fcntl.LOCK_EX, 1, self.fid) + self.owned = True def unlock(self): if not self.owned: raise Exception("can't unlock %r - we don't own it" % self.lockname) - unlink(self.lockname) - # ping any connected readers - os.close(self.rfd) - os.close(self.wfd) - self.rfd = self.wfd = None + fcntl.lockf(_lockfile, fcntl.LOCK_UN, 1, self.fid) self.owned = False - - def wait(self): - if self.owned: - raise Exception("can't wait on %r - we own it" % self.lockname) - try: - # open() will finish only when a writer exists and does close() - fd = os.open(self.lockname, os.O_RDONLY) - try: - os.read(fd, 1) - finally: - os.close(fd) - except OSError, e: - if e.errno == errno.ENOENT: - pass # it's not even unlocked or was unlocked earlier - else: - raise