Switch to using a separate lockfile per target.
The previous method, using fcntl byterange locks, was very efficient and avoided unnecessarily filesystem metadata churn (ie. creating/deleting inodes). Unfortunately, MacOS X (at least version 10.6.5) apparently has a race condition in its fcntl locking that makes it unusably unreliable (http://apenwarr.ca/log/?m=201012#13). My tests indicate that if you only ever lock a *single* byterange on a file, the race condition doesn't cause a problem. So let's just use one lockfile per target. Now "redo -j20 test" passes for me on both MacOS and Linux. This doesn't measurably affect the speed on Linux, at least, in my tests. The bad news: it's hard to safely *delete* those lockfiles when we're done with them, so they tend to accumulate in the .redo dir.
This commit is contained in:
parent
294945bd0f
commit
95680ed7ef
2 changed files with 13 additions and 10 deletions
18
state.py
18
state.py
|
|
@ -19,9 +19,8 @@ def _connect(dbfile):
|
||||||
|
|
||||||
|
|
||||||
_db = None
|
_db = None
|
||||||
_lockfile = None
|
|
||||||
def db():
|
def db():
|
||||||
global _db, _lockfile
|
global _db
|
||||||
if _db:
|
if _db:
|
||||||
return _db
|
return _db
|
||||||
|
|
||||||
|
|
@ -35,10 +34,6 @@ def db():
|
||||||
else:
|
else:
|
||||||
raise
|
raise
|
||||||
|
|
||||||
_lockfile = os.open(os.path.join(vars.BASE, '.redo/locks'),
|
|
||||||
os.O_RDWR | os.O_CREAT, 0666)
|
|
||||||
close_on_exec(_lockfile, True)
|
|
||||||
|
|
||||||
must_create = not os.path.exists(dbfile)
|
must_create = not os.path.exists(dbfile)
|
||||||
if not must_create:
|
if not must_create:
|
||||||
_db = _connect(dbfile)
|
_db = _connect(dbfile)
|
||||||
|
|
@ -296,19 +291,22 @@ class Lock:
|
||||||
def __init__(self, fid):
|
def __init__(self, fid):
|
||||||
self.owned = False
|
self.owned = False
|
||||||
self.fid = fid
|
self.fid = fid
|
||||||
assert(_lockfile >= 0)
|
self.lockfile = os.open(os.path.join(vars.BASE, '.redo/lock.%d' % fid),
|
||||||
|
os.O_RDWR | os.O_CREAT, 0666)
|
||||||
|
close_on_exec(self.lockfile, True)
|
||||||
assert(_locks.get(fid,0) == 0)
|
assert(_locks.get(fid,0) == 0)
|
||||||
_locks[fid] = 1
|
_locks[fid] = 1
|
||||||
|
|
||||||
def __del__(self):
|
def __del__(self):
|
||||||
_locks[self.fid] = 0
|
_locks[self.fid] = 0
|
||||||
|
os.close(self.lockfile)
|
||||||
if self.owned:
|
if self.owned:
|
||||||
self.unlock()
|
self.unlock()
|
||||||
|
|
||||||
def trylock(self):
|
def trylock(self):
|
||||||
assert(not self.owned)
|
assert(not self.owned)
|
||||||
try:
|
try:
|
||||||
fcntl.lockf(_lockfile, fcntl.LOCK_EX|fcntl.LOCK_NB, 1, self.fid)
|
fcntl.lockf(self.lockfile, fcntl.LOCK_EX|fcntl.LOCK_NB, 0, 0)
|
||||||
except IOError, e:
|
except IOError, e:
|
||||||
if e.errno in (errno.EAGAIN, errno.EACCES):
|
if e.errno in (errno.EAGAIN, errno.EACCES):
|
||||||
pass # someone else has it locked
|
pass # someone else has it locked
|
||||||
|
|
@ -319,12 +317,12 @@ class Lock:
|
||||||
|
|
||||||
def waitlock(self):
|
def waitlock(self):
|
||||||
assert(not self.owned)
|
assert(not self.owned)
|
||||||
fcntl.lockf(_lockfile, fcntl.LOCK_EX, 1, self.fid)
|
fcntl.lockf(self.lockfile, fcntl.LOCK_EX, 0, 0)
|
||||||
self.owned = True
|
self.owned = True
|
||||||
|
|
||||||
def unlock(self):
|
def unlock(self):
|
||||||
if not self.owned:
|
if not self.owned:
|
||||||
raise Exception("can't unlock %r - we don't own it"
|
raise Exception("can't unlock %r - we don't own it"
|
||||||
% self.lockname)
|
% self.lockname)
|
||||||
fcntl.lockf(_lockfile, fcntl.LOCK_UN, 1, self.fid)
|
fcntl.lockf(self.lockfile, fcntl.LOCK_UN, 0, 0)
|
||||||
self.owned = False
|
self.owned = False
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,9 @@
|
||||||
echo n2-$1
|
echo n2-$1
|
||||||
echo $1 >>$1.count
|
echo $1 >>$1.count
|
||||||
echo $1 >>in.countall
|
echo $1 >>in.countall
|
||||||
|
|
||||||
|
# we deliberately use 'redo' here instead of redo-ifchange, because this *heavily*
|
||||||
|
# stresses redo's locking when building in parallel. We end up with 100
|
||||||
|
# different targets that all not only depend on this file, but absolutely must
|
||||||
|
# acquire the lock on this file, build it atomically, and release the lock.
|
||||||
redo countall
|
redo countall
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue