From 49ebea445fd0ac82ec4e0c9e4c968b202807b02b Mon Sep 17 00:00:00 2001 From: Avery Pennarun Date: Fri, 10 Dec 2010 04:55:13 -0800 Subject: [PATCH] jwack: don't ever set the jobserver socket to O_NONBLOCK. It creates a race condition: GNU Make might try to read while the socket is O_NONBLOCK, get EAGAIN, and die; or else another redo might set it back to blocking in between our call to make it O_NONBLOCK and our call to read(). This method - setting an alarm() during the read - is hacky, but should work every time. Unfortunately you get a 1s delay - rarely - when this happens. The good news is it only happens when there are no tokens available anyhow, so it won't affect performance much in any situation I can imagine. --- jwack.py | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/jwack.py b/jwack.py index 2876094..73990d0 100644 --- a/jwack.py +++ b/jwack.py @@ -1,7 +1,7 @@ # # beware the jobberwack # -import sys, os, errno, select, fcntl +import sys, os, errno, select, fcntl, signal import atoi _toplevel = 0 @@ -24,22 +24,35 @@ def _release(n): _mytokens = 1 +def _timeout(sig, frame): + pass + + def _try_read(fd, n): - # FIXME: this isn't actually safe, because GNU make can't handle it if - # the socket is nonblocking. Ugh. That means we'll have to do their - # horrible SIGCHLD hack after all. - fcntl.fcntl(_fds[0], fcntl.F_SETFL, os.O_NONBLOCK) + # using djb's suggested way of doing non-blocking reads from a blocking + # socket: http://cr.yp.to/unix/nonblock.html + # We can't just make the socket non-blocking, because we want to be + # compatible with GNU Make, and they can't handle it. + r,w,x = select.select([fd], [], [], 0) + if not r: + return '' # try again + # ok, the socket is readable - but some other process might get there + # first. We have to set an alarm() in case our read() gets stuck. + oldh = signal.signal(signal.SIGALRM, _timeout) try: + signal.alarm(1) # emergency fallback try: b = os.read(_fds[0], 1) except OSError, e: - if e.errno == errno.EAGAIN: - return '' + if e.errno in (errno.EAGAIN, errno.EINTR): + # interrupted or it was nonblocking + return '' # try again else: raise finally: - fcntl.fcntl(_fds[0], fcntl.F_SETFL, 0) - return b and b or None + signal.alarm(0) + signal.signal(signal.SIGALRM, oldh) + return b and b or None # None means EOF def setup(maxjobs):