2010-11-12 20:08:38 -08:00
|
|
|
#
|
|
|
|
|
# beware the jobberwack
|
|
|
|
|
#
|
2010-12-10 04:55:13 -08:00
|
|
|
import sys, os, errno, select, fcntl, signal
|
2010-12-11 18:32:40 -08:00
|
|
|
from helpers import atoi, close_on_exec
|
2018-10-06 04:36:24 -04:00
|
|
|
import state
|
2010-11-12 20:08:38 -08:00
|
|
|
|
2010-11-13 04:36:44 -08:00
|
|
|
_toplevel = 0
|
|
|
|
|
_mytokens = 1
|
2010-11-12 20:08:38 -08:00
|
|
|
_fds = None
|
|
|
|
|
_waitfds = {}
|
2010-11-13 04:36:44 -08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def _debug(s):
|
|
|
|
|
if 0:
|
|
|
|
|
sys.stderr.write('jwack#%d: %s' % (os.getpid(),s))
|
|
|
|
|
|
|
|
|
|
|
2010-11-12 21:09:29 -08:00
|
|
|
def _release(n):
|
2010-11-13 04:36:44 -08:00
|
|
|
global _mytokens
|
|
|
|
|
_mytokens += n
|
Cyclic dependency checker: don't give up token in common case.
The way the code was written, we'd give up our token, detect a cyclic
dependency, and then try to get our token back before exiting. Even
with -j1, the temporary token release allowed any parent up the tree to
continue running jobs, so it would take an arbitrary amount of time
before we could exit (and report an error code to the parent).
There was no visible symptom of this except that, with -j1, t/355-deps-cyclic
would not finish until some of the later tests finished, which was
surprising.
To fix it, let's just check for a cyclic dependency first, then release
the token only once we're sure things are sane.
2018-11-13 06:54:31 -05:00
|
|
|
_debug('release(%d) -> %d\n' % (n, _mytokens))
|
2010-11-13 04:36:44 -08:00
|
|
|
if _mytokens > 1:
|
|
|
|
|
os.write(_fds[1], 't' * (_mytokens-1))
|
|
|
|
|
_mytokens = 1
|
|
|
|
|
|
|
|
|
|
|
2010-12-10 23:04:46 -08:00
|
|
|
def release_mine():
|
|
|
|
|
global _mytokens
|
|
|
|
|
assert(_mytokens >= 1)
|
|
|
|
|
_mytokens -= 1
|
Cyclic dependency checker: don't give up token in common case.
The way the code was written, we'd give up our token, detect a cyclic
dependency, and then try to get our token back before exiting. Even
with -j1, the temporary token release allowed any parent up the tree to
continue running jobs, so it would take an arbitrary amount of time
before we could exit (and report an error code to the parent).
There was no visible symptom of this except that, with -j1, t/355-deps-cyclic
would not finish until some of the later tests finished, which was
surprising.
To fix it, let's just check for a cyclic dependency first, then release
the token only once we're sure things are sane.
2018-11-13 06:54:31 -05:00
|
|
|
_debug('release_mine() -> %d\n' % _mytokens)
|
|
|
|
|
os.write(_fds[1], 't')
|
2010-12-10 23:04:46 -08:00
|
|
|
|
|
|
|
|
|
2010-12-10 04:55:13 -08:00
|
|
|
def _timeout(sig, frame):
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
2010-12-11 18:24:10 -08:00
|
|
|
def _make_pipe(startfd):
|
|
|
|
|
(a,b) = os.pipe()
|
|
|
|
|
fds = (fcntl.fcntl(a, fcntl.F_DUPFD, startfd),
|
|
|
|
|
fcntl.fcntl(b, fcntl.F_DUPFD, startfd+1))
|
|
|
|
|
os.close(a)
|
|
|
|
|
os.close(b)
|
|
|
|
|
return fds
|
|
|
|
|
|
|
|
|
|
|
2010-11-13 04:36:44 -08:00
|
|
|
def _try_read(fd, n):
|
2010-12-10 04:55:13 -08:00
|
|
|
# using djb's suggested way of doing non-blocking reads from a blocking
|
|
|
|
|
# socket: http://cr.yp.to/unix/nonblock.html
|
|
|
|
|
# We can't just make the socket non-blocking, because we want to be
|
|
|
|
|
# compatible with GNU Make, and they can't handle it.
|
|
|
|
|
r,w,x = select.select([fd], [], [], 0)
|
|
|
|
|
if not r:
|
|
|
|
|
return '' # try again
|
|
|
|
|
# ok, the socket is readable - but some other process might get there
|
|
|
|
|
# first. We have to set an alarm() in case our read() gets stuck.
|
2018-10-06 04:36:24 -04:00
|
|
|
assert(state.is_flushed())
|
2010-12-10 04:55:13 -08:00
|
|
|
oldh = signal.signal(signal.SIGALRM, _timeout)
|
2010-11-13 04:36:44 -08:00
|
|
|
try:
|
2018-11-17 10:21:11 -05:00
|
|
|
signal.setitimer(signal.ITIMER_REAL, 0.01, 0.01) # emergency fallback
|
2010-11-13 04:50:03 -08:00
|
|
|
try:
|
|
|
|
|
b = os.read(_fds[0], 1)
|
|
|
|
|
except OSError, e:
|
2010-12-10 04:55:13 -08:00
|
|
|
if e.errno in (errno.EAGAIN, errno.EINTR):
|
|
|
|
|
# interrupted or it was nonblocking
|
|
|
|
|
return '' # try again
|
2010-11-13 04:50:03 -08:00
|
|
|
else:
|
|
|
|
|
raise
|
|
|
|
|
finally:
|
2018-11-17 10:21:11 -05:00
|
|
|
signal.setitimer(signal.ITIMER_REAL, 0, 0)
|
2010-12-10 04:55:13 -08:00
|
|
|
signal.signal(signal.SIGALRM, oldh)
|
|
|
|
|
return b and b or None # None means EOF
|
2010-11-12 20:08:38 -08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def setup(maxjobs):
|
2010-11-13 04:36:44 -08:00
|
|
|
global _fds, _toplevel
|
2010-11-12 20:08:38 -08:00
|
|
|
if _fds:
|
|
|
|
|
return # already set up
|
2010-11-13 04:36:44 -08:00
|
|
|
_debug('setup(%d)\n' % maxjobs)
|
2010-11-12 20:08:38 -08:00
|
|
|
flags = ' ' + os.getenv('MAKEFLAGS', '') + ' '
|
2018-10-03 19:54:54 -04:00
|
|
|
FIND1 = ' --jobserver-auth=' # renamed in GNU make 4.2
|
|
|
|
|
FIND2 = ' --jobserver-fds=' # fallback syntax
|
|
|
|
|
FIND = FIND1
|
|
|
|
|
ofs = flags.find(FIND1)
|
|
|
|
|
if ofs < 0:
|
|
|
|
|
FIND = FIND2
|
|
|
|
|
ofs = flags.find(FIND2)
|
2010-11-12 20:08:38 -08:00
|
|
|
if ofs >= 0:
|
|
|
|
|
s = flags[ofs+len(FIND):]
|
|
|
|
|
(arg,junk) = s.split(' ', 1)
|
|
|
|
|
(a,b) = arg.split(',', 1)
|
2010-12-11 18:32:40 -08:00
|
|
|
a = atoi(a)
|
|
|
|
|
b = atoi(b)
|
2010-11-12 20:08:38 -08:00
|
|
|
if a <= 0 or b <= 0:
|
2018-10-03 19:54:54 -04:00
|
|
|
raise ValueError('invalid --jobserver-auth: %r' % arg)
|
2010-11-13 05:05:48 -08:00
|
|
|
try:
|
|
|
|
|
fcntl.fcntl(a, fcntl.F_GETFL)
|
|
|
|
|
fcntl.fcntl(b, fcntl.F_GETFL)
|
|
|
|
|
except IOError, e:
|
|
|
|
|
if e.errno == errno.EBADF:
|
2018-10-03 19:54:54 -04:00
|
|
|
raise ValueError('broken --jobserver-auth from make; prefix your Makefile rule with a "+"')
|
2010-11-13 05:05:48 -08:00
|
|
|
else:
|
|
|
|
|
raise
|
2010-11-12 20:08:38 -08:00
|
|
|
_fds = (a,b)
|
|
|
|
|
if maxjobs and not _fds:
|
|
|
|
|
# need to start a new server
|
2010-11-13 04:36:44 -08:00
|
|
|
_toplevel = maxjobs
|
2010-12-11 18:24:10 -08:00
|
|
|
_fds = _make_pipe(100)
|
2010-11-13 04:36:44 -08:00
|
|
|
_release(maxjobs-1)
|
2010-11-12 20:08:38 -08:00
|
|
|
os.putenv('MAKEFLAGS',
|
2018-10-03 19:54:54 -04:00
|
|
|
'%s -j --jobserver-auth=%d,%d --jobserver-fds=%d,%d' %
|
|
|
|
|
(os.getenv('MAKEFLAGS', ''),
|
|
|
|
|
_fds[0], _fds[1],
|
|
|
|
|
_fds[0], _fds[1]))
|
2010-11-12 20:08:38 -08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def wait(want_token):
|
|
|
|
|
rfds = _waitfds.keys()
|
|
|
|
|
if _fds and want_token:
|
|
|
|
|
rfds.append(_fds[0])
|
2010-11-19 06:04:45 -08:00
|
|
|
assert(rfds)
|
2018-10-06 04:36:24 -04:00
|
|
|
assert(state.is_flushed())
|
2010-11-12 20:08:38 -08:00
|
|
|
r,w,x = select.select(rfds, [], [])
|
2010-11-13 05:05:48 -08:00
|
|
|
_debug('_fds=%r; wfds=%r; readable: %r\n' % (_fds, _waitfds, r))
|
2010-11-12 20:08:38 -08:00
|
|
|
for fd in r:
|
|
|
|
|
if _fds and fd == _fds[0]:
|
|
|
|
|
pass
|
|
|
|
|
else:
|
2010-11-13 04:36:44 -08:00
|
|
|
pd = _waitfds[fd]
|
|
|
|
|
_debug("done: %r\n" % pd.name)
|
|
|
|
|
_release(1)
|
|
|
|
|
os.close(fd)
|
|
|
|
|
del _waitfds[fd]
|
|
|
|
|
rv = os.waitpid(pd.pid, 0)
|
|
|
|
|
assert(rv[0] == pd.pid)
|
2010-11-21 07:09:47 -08:00
|
|
|
_debug("done1: rv=%r\n" % (rv,))
|
2010-11-13 04:36:44 -08:00
|
|
|
rv = rv[1]
|
|
|
|
|
if os.WIFEXITED(rv):
|
|
|
|
|
pd.rv = os.WEXITSTATUS(rv)
|
2010-11-12 20:08:38 -08:00
|
|
|
else:
|
2010-11-13 04:36:44 -08:00
|
|
|
pd.rv = -os.WTERMSIG(rv)
|
2010-11-21 07:09:47 -08:00
|
|
|
_debug("done2: rv=%d\n" % pd.rv)
|
2010-11-19 06:04:45 -08:00
|
|
|
pd.donefunc(pd.name, pd.rv)
|
2010-11-13 04:36:44 -08:00
|
|
|
|
|
|
|
|
|
2010-12-09 05:53:30 -08:00
|
|
|
def has_token():
|
|
|
|
|
if _mytokens >= 1:
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
|
2010-11-13 04:36:44 -08:00
|
|
|
def get_token(reason):
|
2018-10-06 04:36:24 -04:00
|
|
|
assert(state.is_flushed())
|
2010-11-13 04:36:44 -08:00
|
|
|
global _mytokens
|
2010-11-21 22:46:20 -08:00
|
|
|
assert(_mytokens <= 1)
|
2010-11-19 07:09:26 -08:00
|
|
|
setup(1)
|
2010-11-12 20:08:38 -08:00
|
|
|
while 1:
|
2010-11-13 04:36:44 -08:00
|
|
|
if _mytokens >= 1:
|
2010-11-21 22:46:20 -08:00
|
|
|
_debug("_mytokens is %d\n" % _mytokens)
|
|
|
|
|
assert(_mytokens == 1)
|
2010-11-13 04:36:44 -08:00
|
|
|
_debug('(%r) used my own token...\n' % reason)
|
2010-11-21 22:46:20 -08:00
|
|
|
break
|
|
|
|
|
assert(_mytokens < 1)
|
2010-11-13 04:36:44 -08:00
|
|
|
_debug('(%r) waiting for tokens...\n' % reason)
|
2010-11-12 20:08:38 -08:00
|
|
|
wait(want_token=1)
|
2010-11-21 22:46:20 -08:00
|
|
|
if _mytokens >= 1:
|
|
|
|
|
break
|
|
|
|
|
assert(_mytokens < 1)
|
2010-11-12 20:08:38 -08:00
|
|
|
if _fds:
|
2010-11-13 04:36:44 -08:00
|
|
|
b = _try_read(_fds[0], 1)
|
|
|
|
|
if b == None:
|
|
|
|
|
raise Exception('unexpected EOF on token read')
|
2010-11-12 20:08:38 -08:00
|
|
|
if b:
|
2010-11-21 22:46:20 -08:00
|
|
|
_mytokens += 1
|
|
|
|
|
_debug('(%r) got a token (%r).\n' % (reason, b))
|
2010-11-12 20:08:38 -08:00
|
|
|
break
|
2010-11-21 22:46:20 -08:00
|
|
|
assert(_mytokens <= 1)
|
2010-11-12 20:08:38 -08:00
|
|
|
|
|
|
|
|
|
2010-11-19 06:04:45 -08:00
|
|
|
def running():
|
|
|
|
|
return len(_waitfds)
|
|
|
|
|
|
|
|
|
|
|
2010-11-12 20:08:38 -08:00
|
|
|
def wait_all():
|
2010-11-13 04:36:44 -08:00
|
|
|
_debug("wait_all\n")
|
2018-10-06 04:36:24 -04:00
|
|
|
assert(state.is_flushed())
|
2010-11-19 06:04:45 -08:00
|
|
|
while running():
|
2010-12-10 23:04:46 -08:00
|
|
|
while _mytokens >= 1:
|
|
|
|
|
release_mine()
|
2010-11-13 04:36:44 -08:00
|
|
|
_debug("wait_all: wait()\n")
|
2010-11-12 20:08:38 -08:00
|
|
|
wait(want_token=0)
|
2010-11-13 04:36:44 -08:00
|
|
|
_debug("wait_all: empty list\n")
|
2010-12-10 23:04:46 -08:00
|
|
|
get_token('self') # get my token back
|
2010-11-13 04:36:44 -08:00
|
|
|
if _toplevel:
|
|
|
|
|
bb = ''
|
|
|
|
|
while 1:
|
|
|
|
|
b = _try_read(_fds[0], 8192)
|
|
|
|
|
bb += b
|
|
|
|
|
if not b: break
|
|
|
|
|
if len(bb) != _toplevel-1:
|
Cyclic dependency checker: don't give up token in common case.
The way the code was written, we'd give up our token, detect a cyclic
dependency, and then try to get our token back before exiting. Even
with -j1, the temporary token release allowed any parent up the tree to
continue running jobs, so it would take an arbitrary amount of time
before we could exit (and report an error code to the parent).
There was no visible symptom of this except that, with -j1, t/355-deps-cyclic
would not finish until some of the later tests finished, which was
surprising.
To fix it, let's just check for a cyclic dependency first, then release
the token only once we're sure things are sane.
2018-11-13 06:54:31 -05:00
|
|
|
raise Exception('on exit: expected %d tokens; found %r'
|
2010-12-10 05:19:49 -08:00
|
|
|
% (_toplevel-1, len(bb)))
|
2010-11-19 06:04:45 -08:00
|
|
|
os.write(_fds[1], bb)
|
2010-11-12 20:08:38 -08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def force_return_tokens():
|
2010-11-13 04:36:44 -08:00
|
|
|
n = len(_waitfds)
|
|
|
|
|
if n:
|
|
|
|
|
_debug('%d tokens left in force_return_tokens\n' % n)
|
|
|
|
|
_debug('returning %d tokens\n' % n)
|
|
|
|
|
for k in _waitfds.keys():
|
|
|
|
|
del _waitfds[k]
|
2010-11-12 20:08:38 -08:00
|
|
|
if _fds:
|
2010-11-12 21:09:29 -08:00
|
|
|
_release(n)
|
2018-10-06 04:36:24 -04:00
|
|
|
assert(state.is_flushed())
|
2010-11-12 20:08:38 -08:00
|
|
|
|
|
|
|
|
|
2010-11-13 04:36:44 -08:00
|
|
|
def _pre_job(r, w, pfn):
|
2010-11-12 20:08:38 -08:00
|
|
|
os.close(r)
|
2010-11-13 04:36:44 -08:00
|
|
|
if pfn:
|
|
|
|
|
pfn()
|
2010-11-12 20:08:38 -08:00
|
|
|
|
2010-11-13 04:36:44 -08:00
|
|
|
|
|
|
|
|
class Job:
|
2010-11-19 06:04:45 -08:00
|
|
|
def __init__(self, name, pid, donefunc):
|
2010-11-13 04:36:44 -08:00
|
|
|
self.name = name
|
|
|
|
|
self.pid = pid
|
|
|
|
|
self.rv = None
|
2010-11-19 06:04:45 -08:00
|
|
|
self.donefunc = donefunc
|
|
|
|
|
|
|
|
|
|
def __repr__(self):
|
|
|
|
|
return 'Job(%s,%d)' % (self.name, self.pid)
|
2010-11-13 04:36:44 -08:00
|
|
|
|
|
|
|
|
|
2010-11-22 00:03:43 -08:00
|
|
|
def start_job(reason, jobfunc, donefunc):
|
2018-10-06 04:36:24 -04:00
|
|
|
assert(state.is_flushed())
|
2010-11-19 07:09:26 -08:00
|
|
|
global _mytokens
|
2010-11-21 22:46:20 -08:00
|
|
|
assert(_mytokens <= 1)
|
2010-11-13 04:36:44 -08:00
|
|
|
get_token(reason)
|
2010-11-19 07:09:26 -08:00
|
|
|
assert(_mytokens >= 1)
|
2010-11-21 22:46:20 -08:00
|
|
|
assert(_mytokens == 1)
|
2010-11-19 07:09:26 -08:00
|
|
|
_mytokens -= 1
|
2010-12-11 18:24:10 -08:00
|
|
|
r,w = _make_pipe(50)
|
2010-11-13 04:36:44 -08:00
|
|
|
pid = os.fork()
|
|
|
|
|
if pid == 0:
|
|
|
|
|
# child
|
|
|
|
|
os.close(r)
|
2010-11-19 07:09:26 -08:00
|
|
|
rv = 201
|
2010-11-13 04:36:44 -08:00
|
|
|
try:
|
|
|
|
|
try:
|
2010-11-19 07:09:26 -08:00
|
|
|
rv = jobfunc() or 0
|
2010-11-21 07:09:47 -08:00
|
|
|
_debug('jobfunc completed (%r, %r)\n' % (jobfunc,rv))
|
2010-11-19 07:09:26 -08:00
|
|
|
except Exception:
|
2010-11-19 00:54:36 -08:00
|
|
|
import traceback
|
|
|
|
|
traceback.print_exc()
|
2010-11-13 04:36:44 -08:00
|
|
|
finally:
|
2010-11-21 07:09:47 -08:00
|
|
|
_debug('exit: %d\n' % rv)
|
2010-11-19 07:09:26 -08:00
|
|
|
os._exit(rv)
|
2010-12-11 18:24:10 -08:00
|
|
|
close_on_exec(r, True)
|
2010-11-12 20:08:38 -08:00
|
|
|
os.close(w)
|
2010-11-19 06:04:45 -08:00
|
|
|
pd = Job(reason, pid, donefunc)
|
2010-11-13 04:36:44 -08:00
|
|
|
_waitfds[r] = pd
|