Cyclic dependency checker: don't give up token in common case.

The way the code was written, we'd give up our token, detect a cyclic
dependency, and then try to get our token back before exiting.  Even
with -j1, the temporary token release allowed any parent up the tree to
continue running jobs, so it would take an arbitrary amount of time
before we could exit (and report an error code to the parent).

There was no visible symptom of this except that, with -j1, t/355-deps-cyclic
would not finish until some of the later tests finished, which was
surprising.

To fix it, let's just check for a cyclic dependency first, then release
the token only once we're sure things are sane.
This commit is contained in:
Avery Pennarun 2018-11-13 06:54:31 -05:00
commit bb80118298
3 changed files with 19 additions and 15 deletions

View file

@ -353,18 +353,18 @@ def main(targets, shouldbuildfunc):
backoff *= 2 backoff *= 2
if vars.DEBUG_LOCKS: if vars.DEBUG_LOCKS:
warn('%s (WAITING)\n' % _nice(t)) warn('%s (WAITING)\n' % _nice(t))
try:
lock.check()
except state.CyclicDependencyError:
err('cyclic dependency while building %s\n' % _nice(t))
retcode[0] = 208
return retcode[0]
# this sequence looks a little silly, but the idea is to # this sequence looks a little silly, but the idea is to
# give up our personal token while we wait for the lock to # give up our personal token while we wait for the lock to
# be released; but we should never run get_token() while # be released; but we should never run get_token() while
# holding a lock, or we could cause deadlocks. # holding a lock, or we could cause deadlocks.
jwack.release_mine() jwack.release_mine()
try:
lock.waitlock() lock.waitlock()
except state.CyclicDependencyError:
err('cyclic dependency while building %s\n' % _nice(t))
jwack.get_token(t)
retcode[0] = 208
return retcode[0]
lock.unlock() lock.unlock()
jwack.get_token(t) jwack.get_token(t)
lock.trylock() lock.trylock()

View file

@ -18,8 +18,8 @@ def _debug(s):
def _release(n): def _release(n):
global _mytokens global _mytokens
_debug('release(%d)\n' % n)
_mytokens += n _mytokens += n
_debug('release(%d) -> %d\n' % (n, _mytokens))
if _mytokens > 1: if _mytokens > 1:
os.write(_fds[1], 't' * (_mytokens-1)) os.write(_fds[1], 't' * (_mytokens-1))
_mytokens = 1 _mytokens = 1
@ -28,8 +28,9 @@ def _release(n):
def release_mine(): def release_mine():
global _mytokens global _mytokens
assert(_mytokens >= 1) assert(_mytokens >= 1)
os.write(_fds[1], 't')
_mytokens -= 1 _mytokens -= 1
_debug('release_mine() -> %d\n' % _mytokens)
os.write(_fds[1], 't')
def _timeout(sig, frame): def _timeout(sig, frame):
@ -198,7 +199,7 @@ def wait_all():
bb += b bb += b
if not b: break if not b: break
if len(bb) != _toplevel-1: if len(bb) != _toplevel-1:
raise Exception('on exit: expected %d tokens; found only %r' raise Exception('on exit: expected %d tokens; found %r'
% (_toplevel-1, len(bb))) % (_toplevel-1, len(bb)))
os.write(_fds[1], bb) os.write(_fds[1], bb)

View file

@ -348,8 +348,14 @@ class Lock:
if self.lockfile is not None: if self.lockfile is not None:
os.close(self.lockfile) os.close(self.lockfile)
def trylock(self): def check(self):
assert(not self.owned) assert(not self.owned)
if str(self.fid) in vars.get_locks():
# Lock already held by parent: cyclic dependence
raise CyclicDependencyError()
def trylock(self):
self.check()
try: try:
fcntl.lockf(self.lockfile, fcntl.LOCK_EX|fcntl.LOCK_NB, 0, 0) fcntl.lockf(self.lockfile, fcntl.LOCK_EX|fcntl.LOCK_NB, 0, 0)
except IOError, e: except IOError, e:
@ -361,10 +367,7 @@ class Lock:
self.owned = True self.owned = True
def waitlock(self): def waitlock(self):
assert(not self.owned) self.check()
if str(self.fid) in vars.get_locks():
# Lock already held by parent: cyclic dependence
raise CyclicDependencyError()
fcntl.lockf(self.lockfile, fcntl.LOCK_EX, 0, 0) fcntl.lockf(self.lockfile, fcntl.LOCK_EX, 0, 0)
self.owned = True self.owned = True