2010-11-12 20:08:38 -08:00
|
|
|
#
|
|
|
|
|
# beware the jobberwack
|
|
|
|
|
#
|
2010-12-10 04:55:13 -08:00
|
|
|
import sys, os, errno, select, fcntl, signal
|
2010-12-11 18:32:40 -08:00
|
|
|
from helpers import atoi, close_on_exec
|
2018-10-06 04:36:24 -04:00
|
|
|
import state
|
2010-11-12 20:08:38 -08:00
|
|
|
|
2010-11-13 04:36:44 -08:00
|
|
|
_toplevel = 0
|
|
|
|
|
_mytokens = 1
|
2010-11-12 20:08:38 -08:00
|
|
|
_fds = None
|
|
|
|
|
_waitfds = {}
|
2010-11-13 04:36:44 -08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def _debug(s):
|
|
|
|
|
if 0:
|
|
|
|
|
sys.stderr.write('jwack#%d: %s' % (os.getpid(),s))
|
|
|
|
|
|
|
|
|
|
|
2010-11-12 21:09:29 -08:00
|
|
|
def _release(n):
|
2010-11-13 04:36:44 -08:00
|
|
|
global _mytokens
|
|
|
|
|
_mytokens += n
|
Cyclic dependency checker: don't give up token in common case.
The way the code was written, we'd give up our token, detect a cyclic
dependency, and then try to get our token back before exiting. Even
with -j1, the temporary token release allowed any parent up the tree to
continue running jobs, so it would take an arbitrary amount of time
before we could exit (and report an error code to the parent).
There was no visible symptom of this except that, with -j1, t/355-deps-cyclic
would not finish until some of the later tests finished, which was
surprising.
To fix it, let's just check for a cyclic dependency first, then release
the token only once we're sure things are sane.
2018-11-13 06:54:31 -05:00
|
|
|
_debug('release(%d) -> %d\n' % (n, _mytokens))
|
2010-11-13 04:36:44 -08:00
|
|
|
if _mytokens > 1:
|
|
|
|
|
os.write(_fds[1], 't' * (_mytokens-1))
|
|
|
|
|
_mytokens = 1
|
|
|
|
|
|
|
|
|
|
|
2010-12-10 23:04:46 -08:00
|
|
|
def release_mine():
|
|
|
|
|
global _mytokens
|
|
|
|
|
assert(_mytokens >= 1)
|
|
|
|
|
_mytokens -= 1
|
Cyclic dependency checker: don't give up token in common case.
The way the code was written, we'd give up our token, detect a cyclic
dependency, and then try to get our token back before exiting. Even
with -j1, the temporary token release allowed any parent up the tree to
continue running jobs, so it would take an arbitrary amount of time
before we could exit (and report an error code to the parent).
There was no visible symptom of this except that, with -j1, t/355-deps-cyclic
would not finish until some of the later tests finished, which was
surprising.
To fix it, let's just check for a cyclic dependency first, then release
the token only once we're sure things are sane.
2018-11-13 06:54:31 -05:00
|
|
|
_debug('release_mine() -> %d\n' % _mytokens)
|
|
|
|
|
os.write(_fds[1], 't')
|
2010-12-10 23:04:46 -08:00
|
|
|
|
|
|
|
|
|
2010-12-10 04:55:13 -08:00
|
|
|
def _timeout(sig, frame):
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
redo-log: capture and linearize the output of redo builds.
redo now saves the stderr from every .do script, for every target, into
a file in the .redo directory. That means you can look up the logs
from the most recent build of any target using the new redo-log
command, for example:
redo-log -r all
The default is to show logs non-recursively, that is, it'll show when a
target does redo-ifchange on another target, but it won't recurse into
the logs for the latter target. With -r (recursive), it does. With -u
(unchanged), it does even if redo-ifchange discovered that the target
was already up-to-date; in that case, it prints the logs of the *most
recent* time the target was generated.
With --no-details, redo-log will show only the 'redo' lines, not the
other log messages. For very noisy build systems (like recursing into
a 'make' instance) this can be helpful to get an overview of what
happened, without all the cruft.
You can use the -f (follow) option like tail -f, to follow a build
that's currently in progress until it finishes. redo itself spins up a
copy of redo-log -r -f while it runs, so you can see what's going on.
Still broken in this version:
- No man page or new tests yet.
- ANSI colors don't yet work (unless you use --raw-logs, which gives
the old-style behaviour).
- You can't redirect the output of a sub-redo to a file or a
pipe right now, because redo-log is eating it.
- The regex for matching 'redo' lines in the log is very gross.
Instead, we should put the raw log files in a more machine-parseable
format, and redo-log should turn that into human-readable format.
- redo-log tries to "linearize" the logs, which makes them
comprehensible even for a large parallel build. It recursively shows
log messages for each target in depth-first tree order (by tracing
into a new target every time it sees a 'redo' line). This works
really well, but in some specific cases, the "topmost" redo instance
can get stuck waiting for a jwack token, which makes it look like the
whole build has stalled, when really redo-log is just waiting a long
time for a particular subprocess to be able to continue. We'll need to
add a specific workaround for that.
2018-11-03 22:09:18 -04:00
|
|
|
# We make the pipes use the first available fd numbers starting at startfd.
|
|
|
|
|
# This makes it easier to differentiate different kinds of pipes when using
|
|
|
|
|
# strace.
|
2010-12-11 18:24:10 -08:00
|
|
|
def _make_pipe(startfd):
|
|
|
|
|
(a,b) = os.pipe()
|
|
|
|
|
fds = (fcntl.fcntl(a, fcntl.F_DUPFD, startfd),
|
|
|
|
|
fcntl.fcntl(b, fcntl.F_DUPFD, startfd+1))
|
|
|
|
|
os.close(a)
|
|
|
|
|
os.close(b)
|
|
|
|
|
return fds
|
|
|
|
|
|
|
|
|
|
|
2010-11-13 04:36:44 -08:00
|
|
|
def _try_read(fd, n):
|
2010-12-10 04:55:13 -08:00
|
|
|
# using djb's suggested way of doing non-blocking reads from a blocking
|
|
|
|
|
# socket: http://cr.yp.to/unix/nonblock.html
|
|
|
|
|
# We can't just make the socket non-blocking, because we want to be
|
|
|
|
|
# compatible with GNU Make, and they can't handle it.
|
|
|
|
|
r,w,x = select.select([fd], [], [], 0)
|
|
|
|
|
if not r:
|
|
|
|
|
return '' # try again
|
|
|
|
|
# ok, the socket is readable - but some other process might get there
|
|
|
|
|
# first. We have to set an alarm() in case our read() gets stuck.
|
2018-10-06 04:36:24 -04:00
|
|
|
assert(state.is_flushed())
|
2010-12-10 04:55:13 -08:00
|
|
|
oldh = signal.signal(signal.SIGALRM, _timeout)
|
2010-11-13 04:36:44 -08:00
|
|
|
try:
|
2018-11-17 10:21:11 -05:00
|
|
|
signal.setitimer(signal.ITIMER_REAL, 0.01, 0.01) # emergency fallback
|
2010-11-13 04:50:03 -08:00
|
|
|
try:
|
|
|
|
|
b = os.read(_fds[0], 1)
|
|
|
|
|
except OSError, e:
|
2010-12-10 04:55:13 -08:00
|
|
|
if e.errno in (errno.EAGAIN, errno.EINTR):
|
|
|
|
|
# interrupted or it was nonblocking
|
|
|
|
|
return '' # try again
|
2010-11-13 04:50:03 -08:00
|
|
|
else:
|
|
|
|
|
raise
|
|
|
|
|
finally:
|
2018-11-17 10:21:11 -05:00
|
|
|
signal.setitimer(signal.ITIMER_REAL, 0, 0)
|
2010-12-10 04:55:13 -08:00
|
|
|
signal.signal(signal.SIGALRM, oldh)
|
|
|
|
|
return b and b or None # None means EOF
|
2010-11-12 20:08:38 -08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def setup(maxjobs):
|
2010-11-13 04:36:44 -08:00
|
|
|
global _fds, _toplevel
|
2010-11-12 20:08:38 -08:00
|
|
|
if _fds:
|
|
|
|
|
return # already set up
|
2010-11-13 04:36:44 -08:00
|
|
|
_debug('setup(%d)\n' % maxjobs)
|
2010-11-12 20:08:38 -08:00
|
|
|
flags = ' ' + os.getenv('MAKEFLAGS', '') + ' '
|
2018-10-03 19:54:54 -04:00
|
|
|
FIND1 = ' --jobserver-auth=' # renamed in GNU make 4.2
|
|
|
|
|
FIND2 = ' --jobserver-fds=' # fallback syntax
|
|
|
|
|
FIND = FIND1
|
|
|
|
|
ofs = flags.find(FIND1)
|
|
|
|
|
if ofs < 0:
|
|
|
|
|
FIND = FIND2
|
|
|
|
|
ofs = flags.find(FIND2)
|
2010-11-12 20:08:38 -08:00
|
|
|
if ofs >= 0:
|
|
|
|
|
s = flags[ofs+len(FIND):]
|
|
|
|
|
(arg,junk) = s.split(' ', 1)
|
|
|
|
|
(a,b) = arg.split(',', 1)
|
2010-12-11 18:32:40 -08:00
|
|
|
a = atoi(a)
|
|
|
|
|
b = atoi(b)
|
2010-11-12 20:08:38 -08:00
|
|
|
if a <= 0 or b <= 0:
|
2018-10-03 19:54:54 -04:00
|
|
|
raise ValueError('invalid --jobserver-auth: %r' % arg)
|
2010-11-13 05:05:48 -08:00
|
|
|
try:
|
|
|
|
|
fcntl.fcntl(a, fcntl.F_GETFL)
|
|
|
|
|
fcntl.fcntl(b, fcntl.F_GETFL)
|
|
|
|
|
except IOError, e:
|
|
|
|
|
if e.errno == errno.EBADF:
|
2018-10-03 19:54:54 -04:00
|
|
|
raise ValueError('broken --jobserver-auth from make; prefix your Makefile rule with a "+"')
|
2010-11-13 05:05:48 -08:00
|
|
|
else:
|
|
|
|
|
raise
|
2010-11-12 20:08:38 -08:00
|
|
|
_fds = (a,b)
|
|
|
|
|
if maxjobs and not _fds:
|
|
|
|
|
# need to start a new server
|
2010-11-13 04:36:44 -08:00
|
|
|
_toplevel = maxjobs
|
2010-12-11 18:24:10 -08:00
|
|
|
_fds = _make_pipe(100)
|
2010-11-13 04:36:44 -08:00
|
|
|
_release(maxjobs-1)
|
2010-11-12 20:08:38 -08:00
|
|
|
os.putenv('MAKEFLAGS',
|
2018-10-03 19:54:54 -04:00
|
|
|
'%s -j --jobserver-auth=%d,%d --jobserver-fds=%d,%d' %
|
|
|
|
|
(os.getenv('MAKEFLAGS', ''),
|
|
|
|
|
_fds[0], _fds[1],
|
|
|
|
|
_fds[0], _fds[1]))
|
2010-11-12 20:08:38 -08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def wait(want_token):
|
|
|
|
|
rfds = _waitfds.keys()
|
|
|
|
|
if _fds and want_token:
|
|
|
|
|
rfds.append(_fds[0])
|
2010-11-19 06:04:45 -08:00
|
|
|
assert(rfds)
|
2018-10-06 04:36:24 -04:00
|
|
|
assert(state.is_flushed())
|
2010-11-12 20:08:38 -08:00
|
|
|
r,w,x = select.select(rfds, [], [])
|
2010-11-13 05:05:48 -08:00
|
|
|
_debug('_fds=%r; wfds=%r; readable: %r\n' % (_fds, _waitfds, r))
|
2010-11-12 20:08:38 -08:00
|
|
|
for fd in r:
|
|
|
|
|
if _fds and fd == _fds[0]:
|
|
|
|
|
pass
|
|
|
|
|
else:
|
2010-11-13 04:36:44 -08:00
|
|
|
pd = _waitfds[fd]
|
|
|
|
|
_debug("done: %r\n" % pd.name)
|
|
|
|
|
_release(1)
|
|
|
|
|
os.close(fd)
|
|
|
|
|
del _waitfds[fd]
|
|
|
|
|
rv = os.waitpid(pd.pid, 0)
|
|
|
|
|
assert(rv[0] == pd.pid)
|
2010-11-21 07:09:47 -08:00
|
|
|
_debug("done1: rv=%r\n" % (rv,))
|
2010-11-13 04:36:44 -08:00
|
|
|
rv = rv[1]
|
|
|
|
|
if os.WIFEXITED(rv):
|
|
|
|
|
pd.rv = os.WEXITSTATUS(rv)
|
2010-11-12 20:08:38 -08:00
|
|
|
else:
|
2010-11-13 04:36:44 -08:00
|
|
|
pd.rv = -os.WTERMSIG(rv)
|
2010-11-21 07:09:47 -08:00
|
|
|
_debug("done2: rv=%d\n" % pd.rv)
|
2010-11-19 06:04:45 -08:00
|
|
|
pd.donefunc(pd.name, pd.rv)
|
2010-11-13 04:36:44 -08:00
|
|
|
|
|
|
|
|
|
2010-12-09 05:53:30 -08:00
|
|
|
def has_token():
|
|
|
|
|
if _mytokens >= 1:
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
|
2010-11-13 04:36:44 -08:00
|
|
|
def get_token(reason):
|
2018-10-06 04:36:24 -04:00
|
|
|
assert(state.is_flushed())
|
2010-11-13 04:36:44 -08:00
|
|
|
global _mytokens
|
2010-11-21 22:46:20 -08:00
|
|
|
assert(_mytokens <= 1)
|
2010-11-19 07:09:26 -08:00
|
|
|
setup(1)
|
2010-11-12 20:08:38 -08:00
|
|
|
while 1:
|
2010-11-13 04:36:44 -08:00
|
|
|
if _mytokens >= 1:
|
2010-11-21 22:46:20 -08:00
|
|
|
_debug("_mytokens is %d\n" % _mytokens)
|
|
|
|
|
assert(_mytokens == 1)
|
2010-11-13 04:36:44 -08:00
|
|
|
_debug('(%r) used my own token...\n' % reason)
|
2010-11-21 22:46:20 -08:00
|
|
|
break
|
|
|
|
|
assert(_mytokens < 1)
|
2010-11-13 04:36:44 -08:00
|
|
|
_debug('(%r) waiting for tokens...\n' % reason)
|
2010-11-12 20:08:38 -08:00
|
|
|
wait(want_token=1)
|
2010-11-21 22:46:20 -08:00
|
|
|
if _mytokens >= 1:
|
|
|
|
|
break
|
|
|
|
|
assert(_mytokens < 1)
|
2010-11-12 20:08:38 -08:00
|
|
|
if _fds:
|
2010-11-13 04:36:44 -08:00
|
|
|
b = _try_read(_fds[0], 1)
|
|
|
|
|
if b == None:
|
|
|
|
|
raise Exception('unexpected EOF on token read')
|
2010-11-12 20:08:38 -08:00
|
|
|
if b:
|
2010-11-21 22:46:20 -08:00
|
|
|
_mytokens += 1
|
|
|
|
|
_debug('(%r) got a token (%r).\n' % (reason, b))
|
2010-11-12 20:08:38 -08:00
|
|
|
break
|
2010-11-21 22:46:20 -08:00
|
|
|
assert(_mytokens <= 1)
|
2010-11-12 20:08:38 -08:00
|
|
|
|
|
|
|
|
|
2010-11-19 06:04:45 -08:00
|
|
|
def running():
|
|
|
|
|
return len(_waitfds)
|
|
|
|
|
|
|
|
|
|
|
2010-11-12 20:08:38 -08:00
|
|
|
def wait_all():
|
2010-11-13 04:36:44 -08:00
|
|
|
_debug("wait_all\n")
|
2018-10-06 04:36:24 -04:00
|
|
|
assert(state.is_flushed())
|
2010-11-19 06:04:45 -08:00
|
|
|
while running():
|
2010-12-10 23:04:46 -08:00
|
|
|
while _mytokens >= 1:
|
|
|
|
|
release_mine()
|
2010-11-13 04:36:44 -08:00
|
|
|
_debug("wait_all: wait()\n")
|
2010-11-12 20:08:38 -08:00
|
|
|
wait(want_token=0)
|
2010-11-13 04:36:44 -08:00
|
|
|
_debug("wait_all: empty list\n")
|
2010-12-10 23:04:46 -08:00
|
|
|
get_token('self') # get my token back
|
2010-11-13 04:36:44 -08:00
|
|
|
if _toplevel:
|
|
|
|
|
bb = ''
|
|
|
|
|
while 1:
|
|
|
|
|
b = _try_read(_fds[0], 8192)
|
|
|
|
|
bb += b
|
|
|
|
|
if not b: break
|
|
|
|
|
if len(bb) != _toplevel-1:
|
Cyclic dependency checker: don't give up token in common case.
The way the code was written, we'd give up our token, detect a cyclic
dependency, and then try to get our token back before exiting. Even
with -j1, the temporary token release allowed any parent up the tree to
continue running jobs, so it would take an arbitrary amount of time
before we could exit (and report an error code to the parent).
There was no visible symptom of this except that, with -j1, t/355-deps-cyclic
would not finish until some of the later tests finished, which was
surprising.
To fix it, let's just check for a cyclic dependency first, then release
the token only once we're sure things are sane.
2018-11-13 06:54:31 -05:00
|
|
|
raise Exception('on exit: expected %d tokens; found %r'
|
2010-12-10 05:19:49 -08:00
|
|
|
% (_toplevel-1, len(bb)))
|
2010-11-19 06:04:45 -08:00
|
|
|
os.write(_fds[1], bb)
|
2010-11-12 20:08:38 -08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def force_return_tokens():
|
2010-11-13 04:36:44 -08:00
|
|
|
n = len(_waitfds)
|
|
|
|
|
if n:
|
|
|
|
|
_debug('%d tokens left in force_return_tokens\n' % n)
|
|
|
|
|
_debug('returning %d tokens\n' % n)
|
|
|
|
|
for k in _waitfds.keys():
|
|
|
|
|
del _waitfds[k]
|
2010-11-12 20:08:38 -08:00
|
|
|
if _fds:
|
2010-11-12 21:09:29 -08:00
|
|
|
_release(n)
|
2018-10-06 04:36:24 -04:00
|
|
|
assert(state.is_flushed())
|
2010-11-12 20:08:38 -08:00
|
|
|
|
|
|
|
|
|
2010-11-13 04:36:44 -08:00
|
|
|
def _pre_job(r, w, pfn):
|
2010-11-12 20:08:38 -08:00
|
|
|
os.close(r)
|
2010-11-13 04:36:44 -08:00
|
|
|
if pfn:
|
|
|
|
|
pfn()
|
2010-11-12 20:08:38 -08:00
|
|
|
|
2010-11-13 04:36:44 -08:00
|
|
|
|
|
|
|
|
class Job:
|
2010-11-19 06:04:45 -08:00
|
|
|
def __init__(self, name, pid, donefunc):
|
2010-11-13 04:36:44 -08:00
|
|
|
self.name = name
|
|
|
|
|
self.pid = pid
|
|
|
|
|
self.rv = None
|
2010-11-19 06:04:45 -08:00
|
|
|
self.donefunc = donefunc
|
|
|
|
|
|
|
|
|
|
def __repr__(self):
|
|
|
|
|
return 'Job(%s,%d)' % (self.name, self.pid)
|
2010-11-13 04:36:44 -08:00
|
|
|
|
|
|
|
|
|
2010-11-22 00:03:43 -08:00
|
|
|
def start_job(reason, jobfunc, donefunc):
|
2018-10-06 04:36:24 -04:00
|
|
|
assert(state.is_flushed())
|
2010-11-19 07:09:26 -08:00
|
|
|
global _mytokens
|
2010-11-21 22:46:20 -08:00
|
|
|
assert(_mytokens <= 1)
|
2010-11-13 04:36:44 -08:00
|
|
|
get_token(reason)
|
2010-11-19 07:09:26 -08:00
|
|
|
assert(_mytokens >= 1)
|
2010-11-21 22:46:20 -08:00
|
|
|
assert(_mytokens == 1)
|
2010-11-19 07:09:26 -08:00
|
|
|
_mytokens -= 1
|
2010-12-11 18:24:10 -08:00
|
|
|
r,w = _make_pipe(50)
|
2010-11-13 04:36:44 -08:00
|
|
|
pid = os.fork()
|
|
|
|
|
if pid == 0:
|
|
|
|
|
# child
|
|
|
|
|
os.close(r)
|
2010-11-19 07:09:26 -08:00
|
|
|
rv = 201
|
2010-11-13 04:36:44 -08:00
|
|
|
try:
|
|
|
|
|
try:
|
2010-11-19 07:09:26 -08:00
|
|
|
rv = jobfunc() or 0
|
2010-11-21 07:09:47 -08:00
|
|
|
_debug('jobfunc completed (%r, %r)\n' % (jobfunc,rv))
|
2010-11-19 07:09:26 -08:00
|
|
|
except Exception:
|
2010-11-19 00:54:36 -08:00
|
|
|
import traceback
|
|
|
|
|
traceback.print_exc()
|
2010-11-13 04:36:44 -08:00
|
|
|
finally:
|
2010-11-21 07:09:47 -08:00
|
|
|
_debug('exit: %d\n' % rv)
|
2010-11-19 07:09:26 -08:00
|
|
|
os._exit(rv)
|
2010-12-11 18:24:10 -08:00
|
|
|
close_on_exec(r, True)
|
2010-11-12 20:08:38 -08:00
|
|
|
os.close(w)
|
2010-11-19 06:04:45 -08:00
|
|
|
pd = Job(reason, pid, donefunc)
|
2010-11-13 04:36:44 -08:00
|
|
|
_waitfds[r] = pd
|