From 2dbd47100df5f122b8951f9e254a7a94c20b1bc2 Mon Sep 17 00:00:00 2001 From: Avery Pennarun Date: Mon, 22 Nov 2010 03:21:17 -0800 Subject: [PATCH] state.py: reduce race condition between Lock.trylock() and unlock(). If 'redo clean' deletes the lockfile after trylock() succeeds but before unlock(), then unlock() won't be able to open the pipe in order to release readers, and any waiters might end up waiting forever. We can't open the fifo for write until there's at least one reader, so let's open a reader *just* to let us open a writer. Then we'll leave them open until the later unlock(), which can just close them both. --- builder.py | 31 +++++++++++++++---------------- helpers.py | 10 +++++++++- state.py | 35 ++++++++++++++++------------------- 3 files changed, 40 insertions(+), 36 deletions(-) diff --git a/builder.py b/builder.py index 2085ec2..b22d873 100644 --- a/builder.py +++ b/builder.py @@ -1,6 +1,6 @@ -import sys, os, random, fcntl +import sys, os, random import vars, jwack, state -from helpers import log, log_, debug2, err, unlink +from helpers import log, log_, debug2, err, unlink, close_on_exec def _possible_do_files(t): @@ -31,14 +31,6 @@ def _nice(t): return os.path.normpath(os.path.join(vars.PWD, t)) -def _close_on_exec(fd, yes): - fl = fcntl.fcntl(fd, fcntl.F_GETFD) - fl &= ~fcntl.FD_CLOEXEC - if yes: - fl |= fcntl.FD_CLOEXEC - fcntl.fcntl(fd, fcntl.F_SETFD, fl) - - class BuildJob: def __init__(self, t, lock, shouldbuildfunc, donefunc): self.t = t @@ -71,7 +63,7 @@ class BuildJob: state.stamp(dofile) unlink(tmpname) ffd = os.open(tmpname, os.O_CREAT|os.O_RDWR|os.O_EXCL) - _close_on_exec(ffd, True) + close_on_exec(ffd, True) self.f = os.fdopen(ffd, 'w+') # this will run in the dofile's directory, so use only basenames here argv = ['sh', '-e', @@ -97,12 +89,18 @@ class BuildJob: os.chdir(dn) os.dup2(self.f.fileno(), 1) os.close(self.f.fileno()) - _close_on_exec(1, False) + close_on_exec(1, False) os.execvp(self.argv[0], self.argv) assert(0) # returns only if there's an exception def _after(self, t, rv): + try: + self._after1(t, rv) + finally: + self._after2(rv) + + def _after1(self, t, rv): f = self.f tmpname = self.tmpname if rv==0: @@ -123,12 +121,13 @@ class BuildJob: else: if vars.VERBOSE or vars.XTRACE: log('%s (done)\n\n' % _nice(t)) - return self._after2(rv) def _after2(self, rv): - self.donefunc(self.t, rv) - assert(self.lock.owned) - self.lock.unlock() + try: + self.donefunc(self.t, rv) + assert(self.lock.owned) + finally: + self.lock.unlock() def main(targets, shouldbuildfunc): diff --git a/helpers.py b/helpers.py index 17301da..ce3ad69 100644 --- a/helpers.py +++ b/helpers.py @@ -1,4 +1,4 @@ -import sys, os, errno +import sys, os, errno, fcntl import vars @@ -69,3 +69,11 @@ def debug2(s): log_('redo: %s%s' % (vars.DEPTH, s)) +def close_on_exec(fd, yes): + fl = fcntl.fcntl(fd, fcntl.F_GETFD) + fl &= ~fcntl.FD_CLOEXEC + if yes: + fl |= fcntl.FD_CLOEXEC + fcntl.fcntl(fd, fcntl.F_SETFD, fl) + + diff --git a/state.py b/state.py index 4e4da25..4a5ad18 100644 --- a/state.py +++ b/state.py @@ -1,6 +1,6 @@ import sys, os, errno, glob import vars -from helpers import unlink, debug2, mkdirp +from helpers import unlink, debug2, mkdirp, close_on_exec def init(): @@ -145,8 +145,8 @@ def start(t): class Lock: def __init__(self, t): self.owned = False + self.rfd = self.wfd = None self.lockname = _sname('lock', t) - self.tmpname = _sname('lock%d' % os.getpid(), t) def __del__(self): if self.owned: @@ -156,6 +156,10 @@ class Lock: try: os.mkfifo(self.lockname, 0600) self.owned = True + self.rfd = os.open(self.lockname, os.O_RDONLY|os.O_NONBLOCK) + self.wfd = os.open(self.lockname, os.O_WRONLY) + close_on_exec(self.rfd, True) + close_on_exec(self.wfd, True) except OSError, e: if e.errno == errno.EEXIST: pass @@ -172,22 +176,11 @@ class Lock: if not self.owned: raise Exception("can't unlock %r - we don't own it" % self.lockname) - # make sure no readers can connect - try: - os.rename(self.lockname, self.tmpname) - except OSError, e: - if e.errno == errno.ENOENT: # 'make clean' might do this - self.owned = False - return - try: - # ping any connected readers - os.close(os.open(self.tmpname, os.O_WRONLY|os.O_NONBLOCK)) - except OSError, e: - if e.errno == errno.ENXIO: # no readers open; that's ok - pass - else: - raise - os.unlink(self.tmpname) + unlink(self.lockname) + # ping any connected readers + os.close(self.rfd) + os.close(self.wfd) + self.rfd = self.wfd = None self.owned = False def wait(self): @@ -195,7 +188,11 @@ class Lock: raise Exception("can't wait on %r - we own it" % self.lockname) try: # open() will finish only when a writer exists and does close() - os.close(os.open(self.lockname, os.O_RDONLY)) + fd = os.open(self.lockname, os.O_RDONLY) + try: + os.read(fd, 1) + finally: + os.close(fd) except OSError, e: if e.errno == errno.ENOENT: pass # it's not even unlocked or was unlocked earlier