Some renaming and comments to try to clarify builder and jobserver.

The code is still a bit spaghetti-like, especialy when it comes to
redo-unlocked, but at least the new names are slightly more
comprehensible.
This commit is contained in:
Avery Pennarun 2018-12-11 02:57:29 +00:00
commit 2b4fe812e2
4 changed files with 189 additions and 70 deletions

View file

@ -1,3 +1,4 @@
"""Code for parallel-building a set of targets, if needed."""
import sys, os, errno, stat, signal, time
from . import cycles, env, jobserver, logs, state, paths
from .helpers import unlink, close_on_exec
@ -29,6 +30,13 @@ def close_stdin():
def start_stdin_log_reader(status, details, pretty, color,
debug_locks, debug_pids):
"""Redirect stderr to a redo-log instance with the given options.
Then we automatically run logs.setup() to send the right data format
to that redo-log instance.
After this, be sure to run await_log_reader() before exiting.
"""
global log_reader_pid
r, w = os.pipe() # main pipe to redo-log
ar, aw = os.pipe() # ack pipe from redo-log --ack-fd
@ -87,6 +95,7 @@ def start_stdin_log_reader(status, details, pretty, color,
def await_log_reader():
"""Await the redo-log instance we redirected stderr to, if any."""
if not env.v.LOG:
return
if log_reader_pid > 0:
@ -107,7 +116,7 @@ class ImmediateReturn(Exception):
self.rv = rv
class BuildJob(object):
class _BuildJob(object):
def __init__(self, t, sf, lock, shouldbuildfunc, donefunc):
self.t = t # original target name, not relative to env.v.BASE
self.sf = sf
@ -123,7 +132,11 @@ class BuildJob(object):
self.donefunc = donefunc
self.before_t = _try_stat(self.t)
# attributes of the running process
self.f = None
def start(self):
"""Actually start running this job in a subproc, if needed."""
assert self.lock.owned
try:
try:
@ -135,16 +148,17 @@ class BuildJob(object):
# target doesn't need to be built; skip the whole task
if is_target:
meta('unchanged', state.target_relpath(self.t))
return self._after2(0)
return self._finalize(0)
except ImmediateReturn, e:
return self._after2(e.rv)
return self._finalize(e.rv)
if env.v.NO_OOB or dirty == True: # pylint: disable=singleton-comparison
self._start_do()
self._start_self()
else:
self._start_unlocked(dirty)
self._start_deps_unlocked(dirty)
def _start_do(self):
def _start_self(self):
"""Run jobserver.start() to build this object's target file."""
assert self.lock.owned
t = self.t
sf = self.sf
@ -159,7 +173,7 @@ class BuildJob(object):
sf.set_override()
sf.set_checked()
sf.save()
return self._after2(0)
return self._finalize(0)
if (os.path.exists(t) and not os.path.isdir(t + '/.')
and not sf.is_generated):
# an existing source file that was not generated by us.
@ -170,22 +184,21 @@ class BuildJob(object):
debug2("-- static (%r)\n" % t)
sf.set_static()
sf.save()
return self._after2(0)
return self._finalize(0)
sf.zap_deps1()
(dodir, dofile, _, basename, ext) = paths.find_do_file(sf)
if not dofile:
if os.path.exists(t):
sf.set_static()
sf.save()
return self._after2(0)
return self._finalize(0)
else:
err('no rule to redo %r\n' % t)
return self._after2(1)
return self._finalize(1)
unlink(self.tmpname1)
unlink(self.tmpname2)
ffd = os.open(self.tmpname1, os.O_CREAT|os.O_RDWR|os.O_EXCL, 0666)
close_on_exec(ffd, True)
# pylint: disable=attribute-defined-outside-init
self.f = os.fdopen(ffd, 'w+')
# this will run in the dofile's directory, so use only basenames here
arg1 = basename + ext # target name (including extension)
@ -208,31 +221,36 @@ class BuildJob(object):
# that way redo-log won't trace into an obsolete logfile.
if env.v.LOG:
open(state.logname(self.sf.id), 'w')
# FIXME: put these variables somewhere else, instead of on-the-fly
# extending this class!
# pylint: disable=attribute-defined-outside-init
self.dodir = dodir
self.basename = basename
self.ext = ext
self.argv = argv
dof = state.File(name=os.path.join(dodir, dofile))
dof.set_static()
dof.save()
state.commit()
meta('do', state.target_relpath(t))
jobserver.start_job(t, self._do_subproc, self._after)
def call_subproc():
self._subproc(dodir, basename, ext, argv)
def call_exited(t, rv):
self._subproc_exited(t, rv, argv)
jobserver.start(t, call_subproc, call_exited)
def _start_unlocked(self, dirty):
# out-of-band redo of some sub-objects. This happens when we're not
# quite sure if t needs to be built or not (because some children
# look dirty, but might turn out to be clean thanks to checksums).
# We have to call redo-unlocked to figure it all out.
#
# Note: redo-unlocked will handle all the updating of sf, so we
# don't have to do it here, nor call _after1. However, we have to
# hold onto the lock because otherwise we would introduce a race
# condition; that's why it's called redo-unlocked, because it doesn't
# grab a lock.
def _start_deps_unlocked(self, dirty):
"""Run jobserver.start to build objects needed to check deps.
Out-of-band redo of some sub-objects. This happens when we're not
quite sure if t needs to be built or not (because some children
look dirty, but might turn out to be clean thanks to redo-stamp
checksums). We have to call redo-unlocked to figure it all out.
Note: redo-unlocked will handle all the updating of sf, so we don't
have to do it here, nor call _record_new_state. However, we have to
hold onto the lock because otherwise we would introduce a race
condition; that's why it's called redo-unlocked, because it doesn't
grab a lock.
"""
# FIXME: redo-unlocked is kind of a weird hack.
# Maybe we should just start jobs to build the necessary deps
# directly from this process, and when done, reconsider building
# the target we started with. But that makes this one process's
# build recursive, where currently it's flat.
here = os.getcwd()
def _fix(p):
return state.relpath(os.path.join(env.v.BASE, p), here)
@ -240,32 +258,35 @@ class BuildJob(object):
list(set(_fix(d.name) for d in dirty)))
meta('check', state.target_relpath(self.t))
state.commit()
def run():
def subtask():
os.environ['REDO_DEPTH'] = env.v.DEPTH + ' '
# python ignores SIGPIPE
signal.signal(signal.SIGPIPE, signal.SIG_DFL)
os.execvp(argv[0], argv)
assert 0
# returns only if there's an exception
def after(t, rv):
return self._after2(rv)
jobserver.start_job(self.t, run, after)
def job_exited(t, rv):
return self._finalize(rv)
jobserver.start(self.t, jobfunc=subtask, donefunc=job_exited)
def _do_subproc(self):
def _subproc(self, dodir, basename, ext, argv):
"""The function by jobserver.start to exec the build script.
This is run in the *child* process.
"""
# careful: REDO_PWD was the PWD relative to the STARTPATH at the time
# we *started* building the current target; but that target ran
# redo-ifchange, and it might have done it from a different directory
# than we started it in. So os.getcwd() might be != REDO_PWD right
# now.
assert state.is_flushed()
dn = self.dodir
newp = os.path.realpath(dn)
newp = os.path.realpath(dodir)
os.environ['REDO_PWD'] = state.relpath(newp, env.v.STARTDIR)
os.environ['REDO_TARGET'] = self.basename + self.ext
os.environ['REDO_TARGET'] = basename + ext
os.environ['REDO_DEPTH'] = env.v.DEPTH + ' '
cycles.add(self.lock.fid)
if dn:
os.chdir(dn)
if dodir:
os.chdir(dodir)
os.dup2(self.f.fileno(), 1)
os.close(self.f.fileno())
close_on_exec(1, False)
@ -290,23 +311,35 @@ class BuildJob(object):
os.environ['REDO_LOG'] = ''
signal.signal(signal.SIGPIPE, signal.SIG_DFL) # python ignores SIGPIPE
if env.v.VERBOSE or env.v.XTRACE:
logs.write('* %s' % ' '.join(self.argv).replace('\n', ' '))
os.execvp(self.argv[0], self.argv)
logs.write('* %s' % ' '.join(argv).replace('\n', ' '))
os.execvp(argv[0], argv)
# FIXME: it would be nice to log the exit code to logf.
# But that would have to happen in the parent process, which doesn't
# have logf open.
assert 0
# returns only if there's an exception
def _after(self, t, rv):
def _subproc_exited(self, t, rv, argv):
"""Called by the jobserver when our subtask exits.
This is run in the *parent* process.
"""
try:
state.check_sane()
rv = self._after1(t, rv)
rv = self._record_new_state(t, rv, argv)
state.commit()
finally:
self._after2(rv)
self._finalize(rv)
def _after1(self, t, rv):
def _record_new_state(self, t, rv, argv):
"""After a subtask finishes, handle its changes to the output file.
This is run in the *parent* process.
This includes renaming temp files into place and detecting mistakes
(like writing directly to $1 instead of $3). We also have to record
the new file stamp data for the completed target.
"""
f = self.f
before_t = self.before_t
after_t = _try_stat(t)
@ -315,11 +348,11 @@ class BuildJob(object):
if (after_t and
(not before_t or before_t.st_mtime != after_t.st_mtime) and
not stat.S_ISDIR(after_t.st_mode)):
err('%s modified %s directly!\n' % (self.argv[2], t))
err('%s modified %s directly!\n' % (argv[2], t))
err('...you should update $3 (a temp file) or stdout, not $1.\n')
rv = 206
elif st2 and st1.st_size > 0:
err('%s wrote to stdout *and* created $3.\n' % self.argv[2])
err('%s wrote to stdout *and* created $3.\n' % argv[2])
err('...you should write status messages to stderr, not stdout.\n')
rv = 207
if rv == 0:
@ -345,7 +378,7 @@ class BuildJob(object):
unlink(t)
else:
err('%s: can\'t save stdout to %r: %s\n' %
(self.argv[2], t, e.strerror))
(argv[2], t, e.strerror))
rv = 1000
if st2:
os.unlink(self.tmpname2)
@ -376,7 +409,12 @@ class BuildJob(object):
meta('done', '%d %s' % (rv, state.target_relpath(self.t)))
return rv
def _after2(self, rv):
def _finalize(self, rv):
"""After a target is built, report completion and unlock.
This is run in the *parent* process.
Note: you usually need to call _record_new_state() first.
"""
try:
self.donefunc(self.t, rv)
assert self.lock.owned
@ -384,7 +422,19 @@ class BuildJob(object):
self.lock.unlock()
def main(targets, shouldbuildfunc):
def run(targets, shouldbuildfunc):
"""Build the given list of targets, if necessary.
Builds in parallel using whatever jobserver tokens can be obtained.
Args:
targets: a list of target filenames to consider building.
shouldbuildfunc: a function(target) which determines whether the given
target needs to be built, as of the time it is called.
Returns:
0 if all necessary targets returned exit code zero; nonzero otherwise.
"""
retcode = [0] # a list so that it can be reassigned from done()
if env.v.SHUFFLE:
import random
@ -392,7 +442,7 @@ def main(targets, shouldbuildfunc):
locked = []
def done(t, rv):
def job_exited(t, rv):
if rv:
retcode[0] = 1
@ -456,7 +506,9 @@ def main(targets, shouldbuildfunc):
# FIXME: separate obtaining the fid from creating the File.
# FIXME: maybe integrate locking into the File object?
f.refresh()
BuildJob(t, f, lock, shouldbuildfunc, done).start()
_BuildJob(t, f, lock,
shouldbuildfunc=shouldbuildfunc,
donefunc=job_exited).start()
state.commit()
assert state.is_flushed()
lock = None
@ -520,8 +572,9 @@ def main(targets, shouldbuildfunc):
retcode[0] = 2
lock.unlock()
else:
BuildJob(t, state.File(fid=fid), lock,
shouldbuildfunc, done).start()
_BuildJob(t, state.File(fid=fid), lock,
shouldbuildfunc=shouldbuildfunc,
donefunc=job_exited).start()
lock = None
state.commit()
return retcode[0]