Add more assertions about uncommitted sqlite transactions.

I think we were sometimes leaving half-done sqlite transactions sitting
around for a long time (eg. across sub-calls to .do files).  This
seemed to be okay on Linux, but caused sqlite deadlocks on MacOS.  Most
likely it's not the operating system, but the sqlite version and
journal mode in use.

In any case, the correct thing to do is to actually commit or rollback
transactions, not leave them hanging around.

...unfortunately this doesn't actually fix my MacOS deadlocks, which
makes me rather nervous.
This commit is contained in:
Avery Pennarun 2018-10-06 04:36:24 -04:00
commit 613625b580
5 changed files with 35 additions and 2 deletions

View file

@ -203,6 +203,7 @@ class BuildJob:
# redo-ifchange, and it might have done it from a different directory # redo-ifchange, and it might have done it from a different directory
# than we started it in. So os.getcwd() might be != REDO_PWD right # than we started it in. So os.getcwd() might be != REDO_PWD right
# now. # now.
assert(state.is_flushed())
dn = self.dodir dn = self.dodir
newp = os.path.realpath(dn) newp = os.path.realpath(dn)
os.environ['REDO_PWD'] = state.relpath(newp, vars.STARTDIR) os.environ['REDO_PWD'] = state.relpath(newp, vars.STARTDIR)
@ -319,6 +320,7 @@ def main(targets, shouldbuildfunc):
seen = {} seen = {}
lock = None lock = None
for t in targets: for t in targets:
assert(state.is_flushed())
if t in seen: if t in seen:
continue continue
seen[t] = 1 seen[t] = 1
@ -343,6 +345,8 @@ def main(targets, shouldbuildfunc):
locked.append((f.id,t)) locked.append((f.id,t))
else: else:
BuildJob(t, f, lock, shouldbuildfunc, done).start() BuildJob(t, f, lock, shouldbuildfunc, done).start()
state.commit()
assert(state.is_flushed())
del lock del lock

View file

@ -3,6 +3,7 @@
# #
import sys, os, errno, select, fcntl, signal import sys, os, errno, select, fcntl, signal
from helpers import atoi, close_on_exec from helpers import atoi, close_on_exec
import state
_toplevel = 0 _toplevel = 0
_mytokens = 1 _mytokens = 1
@ -54,6 +55,7 @@ def _try_read(fd, n):
return '' # try again return '' # try again
# ok, the socket is readable - but some other process might get there # ok, the socket is readable - but some other process might get there
# first. We have to set an alarm() in case our read() gets stuck. # first. We have to set an alarm() in case our read() gets stuck.
assert(state.is_flushed())
oldh = signal.signal(signal.SIGALRM, _timeout) oldh = signal.signal(signal.SIGALRM, _timeout)
try: try:
signal.alarm(1) # emergency fallback signal.alarm(1) # emergency fallback
@ -118,6 +120,7 @@ def wait(want_token):
if _fds and want_token: if _fds and want_token:
rfds.append(_fds[0]) rfds.append(_fds[0])
assert(rfds) assert(rfds)
assert(state.is_flushed())
r,w,x = select.select(rfds, [], []) r,w,x = select.select(rfds, [], [])
_debug('_fds=%r; wfds=%r; readable: %r\n' % (_fds, _waitfds, r)) _debug('_fds=%r; wfds=%r; readable: %r\n' % (_fds, _waitfds, r))
for fd in r: for fd in r:
@ -147,6 +150,7 @@ def has_token():
def get_token(reason): def get_token(reason):
assert(state.is_flushed())
global _mytokens global _mytokens
assert(_mytokens <= 1) assert(_mytokens <= 1)
setup(1) setup(1)
@ -179,6 +183,7 @@ def running():
def wait_all(): def wait_all():
_debug("wait_all\n") _debug("wait_all\n")
assert(state.is_flushed())
while running(): while running():
while _mytokens >= 1: while _mytokens >= 1:
release_mine() release_mine()
@ -207,6 +212,7 @@ def force_return_tokens():
del _waitfds[k] del _waitfds[k]
if _fds: if _fds:
_release(n) _release(n)
assert(state.is_flushed())
def _pre_job(r, w, pfn): def _pre_job(r, w, pfn):
@ -227,6 +233,7 @@ class Job:
def start_job(reason, jobfunc, donefunc): def start_job(reason, jobfunc, donefunc):
assert(state.is_flushed())
global _mytokens global _mytokens
assert(_mytokens <= 1) assert(_mytokens <= 1)
get_token(reason) get_token(reason)

View file

@ -32,7 +32,11 @@ try:
for t in targets: for t in targets:
f.add_dep('m', t) f.add_dep('m', t)
f.save() f.save()
state.commit()
rv = builder.main(targets, should_build) rv = builder.main(targets, should_build)
finally:
try:
state.rollback()
finally: finally:
jwack.force_return_tokens() jwack.force_return_tokens()
except KeyboardInterrupt: except KeyboardInterrupt:

View file

@ -62,7 +62,12 @@ try:
err('invalid --jobs value: %r\n' % opt.jobs) err('invalid --jobs value: %r\n' % opt.jobs)
jwack.setup(j) jwack.setup(j)
try: try:
assert(state.is_flushed())
retcode = builder.main(targets, lambda t: True) retcode = builder.main(targets, lambda t: True)
assert(state.is_flushed())
finally:
try:
state.rollback()
finally: finally:
jwack.force_return_tokens() jwack.force_return_tokens()
sys.exit(retcode) sys.exit(retcode)

View file

@ -108,6 +108,19 @@ def commit():
_wrote = 0 _wrote = 0
def rollback():
if _insane:
return
global _wrote
if _wrote:
db().rollback()
_wrote = 0
def is_flushed():
return not _wrote
_insane = None _insane = None
def check_sane(): def check_sane():
global _insane, _writable global _insane, _writable