From 613625b580fce54f276fde04e5f386ccb4d317ab Mon Sep 17 00:00:00 2001 From: Avery Pennarun Date: Sat, 6 Oct 2018 04:36:24 -0400 Subject: [PATCH] Add more assertions about uncommitted sqlite transactions. I think we were sometimes leaving half-done sqlite transactions sitting around for a long time (eg. across sub-calls to .do files). This seemed to be okay on Linux, but caused sqlite deadlocks on MacOS. Most likely it's not the operating system, but the sqlite version and journal mode in use. In any case, the correct thing to do is to actually commit or rollback transactions, not leave them hanging around. ...unfortunately this doesn't actually fix my MacOS deadlocks, which makes me rather nervous. --- builder.py | 4 ++++ jwack.py | 7 +++++++ redo-ifchange.py | 6 +++++- redo.py | 7 ++++++- state.py | 13 +++++++++++++ 5 files changed, 35 insertions(+), 2 deletions(-) diff --git a/builder.py b/builder.py index 4c32583..523c8cf 100644 --- a/builder.py +++ b/builder.py @@ -203,6 +203,7 @@ class BuildJob: # redo-ifchange, and it might have done it from a different directory # than we started it in. So os.getcwd() might be != REDO_PWD right # now. + assert(state.is_flushed()) dn = self.dodir newp = os.path.realpath(dn) os.environ['REDO_PWD'] = state.relpath(newp, vars.STARTDIR) @@ -319,6 +320,7 @@ def main(targets, shouldbuildfunc): seen = {} lock = None for t in targets: + assert(state.is_flushed()) if t in seen: continue seen[t] = 1 @@ -343,6 +345,8 @@ def main(targets, shouldbuildfunc): locked.append((f.id,t)) else: BuildJob(t, f, lock, shouldbuildfunc, done).start() + state.commit() + assert(state.is_flushed()) del lock diff --git a/jwack.py b/jwack.py index 4717faf..1242e60 100644 --- a/jwack.py +++ b/jwack.py @@ -3,6 +3,7 @@ # import sys, os, errno, select, fcntl, signal from helpers import atoi, close_on_exec +import state _toplevel = 0 _mytokens = 1 @@ -54,6 +55,7 @@ def _try_read(fd, n): return '' # try again # ok, the socket is readable - but some other process might get there # first. We have to set an alarm() in case our read() gets stuck. + assert(state.is_flushed()) oldh = signal.signal(signal.SIGALRM, _timeout) try: signal.alarm(1) # emergency fallback @@ -118,6 +120,7 @@ def wait(want_token): if _fds and want_token: rfds.append(_fds[0]) assert(rfds) + assert(state.is_flushed()) r,w,x = select.select(rfds, [], []) _debug('_fds=%r; wfds=%r; readable: %r\n' % (_fds, _waitfds, r)) for fd in r: @@ -147,6 +150,7 @@ def has_token(): def get_token(reason): + assert(state.is_flushed()) global _mytokens assert(_mytokens <= 1) setup(1) @@ -179,6 +183,7 @@ def running(): def wait_all(): _debug("wait_all\n") + assert(state.is_flushed()) while running(): while _mytokens >= 1: release_mine() @@ -207,6 +212,7 @@ def force_return_tokens(): del _waitfds[k] if _fds: _release(n) + assert(state.is_flushed()) def _pre_job(r, w, pfn): @@ -227,6 +233,7 @@ class Job: def start_job(reason, jobfunc, donefunc): + assert(state.is_flushed()) global _mytokens assert(_mytokens <= 1) get_token(reason) diff --git a/redo-ifchange.py b/redo-ifchange.py index 5cb92fb..c1345c6 100755 --- a/redo-ifchange.py +++ b/redo-ifchange.py @@ -32,9 +32,13 @@ try: for t in targets: f.add_dep('m', t) f.save() + state.commit() rv = builder.main(targets, should_build) finally: - jwack.force_return_tokens() + try: + state.rollback() + finally: + jwack.force_return_tokens() except KeyboardInterrupt: sys.exit(200) state.commit() diff --git a/redo.py b/redo.py index 1298d0e..d6a40fe 100755 --- a/redo.py +++ b/redo.py @@ -62,9 +62,14 @@ try: err('invalid --jobs value: %r\n' % opt.jobs) jwack.setup(j) try: + assert(state.is_flushed()) retcode = builder.main(targets, lambda t: True) + assert(state.is_flushed()) finally: - jwack.force_return_tokens() + try: + state.rollback() + finally: + jwack.force_return_tokens() sys.exit(retcode) except KeyboardInterrupt: sys.exit(200) diff --git a/state.py b/state.py index 08ce5dd..a321448 100644 --- a/state.py +++ b/state.py @@ -108,6 +108,19 @@ def commit(): _wrote = 0 +def rollback(): + if _insane: + return + global _wrote + if _wrote: + db().rollback() + _wrote = 0 + + +def is_flushed(): + return not _wrote + + _insane = None def check_sane(): global _insane, _writable