state.py: reduce race condition between Lock.trylock() and unlock().

If 'redo clean' deletes the lockfile after trylock() succeeds but before
unlock(), then unlock() won't be able to open the pipe in order to release
readers, and any waiters might end up waiting forever.

We can't open the fifo for write until there's at least one reader, so let's
open a reader *just* to let us open a writer.  Then we'll leave them open
until the later unlock(), which can just close them both.
This commit is contained in:
Avery Pennarun 2010-11-22 03:21:17 -08:00
commit 2dbd47100d
3 changed files with 40 additions and 36 deletions

View file

@ -1,6 +1,6 @@
import sys, os, random, fcntl import sys, os, random
import vars, jwack, state import vars, jwack, state
from helpers import log, log_, debug2, err, unlink from helpers import log, log_, debug2, err, unlink, close_on_exec
def _possible_do_files(t): def _possible_do_files(t):
@ -31,14 +31,6 @@ def _nice(t):
return os.path.normpath(os.path.join(vars.PWD, t)) return os.path.normpath(os.path.join(vars.PWD, t))
def _close_on_exec(fd, yes):
fl = fcntl.fcntl(fd, fcntl.F_GETFD)
fl &= ~fcntl.FD_CLOEXEC
if yes:
fl |= fcntl.FD_CLOEXEC
fcntl.fcntl(fd, fcntl.F_SETFD, fl)
class BuildJob: class BuildJob:
def __init__(self, t, lock, shouldbuildfunc, donefunc): def __init__(self, t, lock, shouldbuildfunc, donefunc):
self.t = t self.t = t
@ -71,7 +63,7 @@ class BuildJob:
state.stamp(dofile) state.stamp(dofile)
unlink(tmpname) unlink(tmpname)
ffd = os.open(tmpname, os.O_CREAT|os.O_RDWR|os.O_EXCL) ffd = os.open(tmpname, os.O_CREAT|os.O_RDWR|os.O_EXCL)
_close_on_exec(ffd, True) close_on_exec(ffd, True)
self.f = os.fdopen(ffd, 'w+') self.f = os.fdopen(ffd, 'w+')
# this will run in the dofile's directory, so use only basenames here # this will run in the dofile's directory, so use only basenames here
argv = ['sh', '-e', argv = ['sh', '-e',
@ -97,12 +89,18 @@ class BuildJob:
os.chdir(dn) os.chdir(dn)
os.dup2(self.f.fileno(), 1) os.dup2(self.f.fileno(), 1)
os.close(self.f.fileno()) os.close(self.f.fileno())
_close_on_exec(1, False) close_on_exec(1, False)
os.execvp(self.argv[0], self.argv) os.execvp(self.argv[0], self.argv)
assert(0) assert(0)
# returns only if there's an exception # returns only if there's an exception
def _after(self, t, rv): def _after(self, t, rv):
try:
self._after1(t, rv)
finally:
self._after2(rv)
def _after1(self, t, rv):
f = self.f f = self.f
tmpname = self.tmpname tmpname = self.tmpname
if rv==0: if rv==0:
@ -123,11 +121,12 @@ class BuildJob:
else: else:
if vars.VERBOSE or vars.XTRACE: if vars.VERBOSE or vars.XTRACE:
log('%s (done)\n\n' % _nice(t)) log('%s (done)\n\n' % _nice(t))
return self._after2(rv)
def _after2(self, rv): def _after2(self, rv):
try:
self.donefunc(self.t, rv) self.donefunc(self.t, rv)
assert(self.lock.owned) assert(self.lock.owned)
finally:
self.lock.unlock() self.lock.unlock()

View file

@ -1,4 +1,4 @@
import sys, os, errno import sys, os, errno, fcntl
import vars import vars
@ -69,3 +69,11 @@ def debug2(s):
log_('redo: %s%s' % (vars.DEPTH, s)) log_('redo: %s%s' % (vars.DEPTH, s))
def close_on_exec(fd, yes):
fl = fcntl.fcntl(fd, fcntl.F_GETFD)
fl &= ~fcntl.FD_CLOEXEC
if yes:
fl |= fcntl.FD_CLOEXEC
fcntl.fcntl(fd, fcntl.F_SETFD, fl)

View file

@ -1,6 +1,6 @@
import sys, os, errno, glob import sys, os, errno, glob
import vars import vars
from helpers import unlink, debug2, mkdirp from helpers import unlink, debug2, mkdirp, close_on_exec
def init(): def init():
@ -145,8 +145,8 @@ def start(t):
class Lock: class Lock:
def __init__(self, t): def __init__(self, t):
self.owned = False self.owned = False
self.rfd = self.wfd = None
self.lockname = _sname('lock', t) self.lockname = _sname('lock', t)
self.tmpname = _sname('lock%d' % os.getpid(), t)
def __del__(self): def __del__(self):
if self.owned: if self.owned:
@ -156,6 +156,10 @@ class Lock:
try: try:
os.mkfifo(self.lockname, 0600) os.mkfifo(self.lockname, 0600)
self.owned = True self.owned = True
self.rfd = os.open(self.lockname, os.O_RDONLY|os.O_NONBLOCK)
self.wfd = os.open(self.lockname, os.O_WRONLY)
close_on_exec(self.rfd, True)
close_on_exec(self.wfd, True)
except OSError, e: except OSError, e:
if e.errno == errno.EEXIST: if e.errno == errno.EEXIST:
pass pass
@ -172,22 +176,11 @@ class Lock:
if not self.owned: if not self.owned:
raise Exception("can't unlock %r - we don't own it" raise Exception("can't unlock %r - we don't own it"
% self.lockname) % self.lockname)
# make sure no readers can connect unlink(self.lockname)
try:
os.rename(self.lockname, self.tmpname)
except OSError, e:
if e.errno == errno.ENOENT: # 'make clean' might do this
self.owned = False
return
try:
# ping any connected readers # ping any connected readers
os.close(os.open(self.tmpname, os.O_WRONLY|os.O_NONBLOCK)) os.close(self.rfd)
except OSError, e: os.close(self.wfd)
if e.errno == errno.ENXIO: # no readers open; that's ok self.rfd = self.wfd = None
pass
else:
raise
os.unlink(self.tmpname)
self.owned = False self.owned = False
def wait(self): def wait(self):
@ -195,7 +188,11 @@ class Lock:
raise Exception("can't wait on %r - we own it" % self.lockname) raise Exception("can't wait on %r - we own it" % self.lockname)
try: try:
# open() will finish only when a writer exists and does close() # open() will finish only when a writer exists and does close()
os.close(os.open(self.lockname, os.O_RDONLY)) fd = os.open(self.lockname, os.O_RDONLY)
try:
os.read(fd, 1)
finally:
os.close(fd)
except OSError, e: except OSError, e:
if e.errno == errno.ENOENT: if e.errno == errno.ENOENT:
pass # it's not even unlocked or was unlocked earlier pass # it's not even unlocked or was unlocked earlier