apenwarr-redo/redo/builder.py

605 lines
24 KiB
Python
Raw Normal View History

"""Code for parallel-building a set of targets, if needed."""
import errno, os, stat, signal, sys, tempfile, time
from . import cycles, env, jobserver, logs, state, paths
from .helpers import unlink, close_on_exec
from .logs import debug2, err, warn, meta
2010-11-19 07:21:09 -08:00
def _nice(t):
return state.relpath(t, env.v.STARTDIR)
def _try_stat(filename):
try:
return os.lstat(filename)
except OSError, e:
if e.errno == errno.ENOENT:
return None
else:
raise
redo-log: capture and linearize the output of redo builds. redo now saves the stderr from every .do script, for every target, into a file in the .redo directory. That means you can look up the logs from the most recent build of any target using the new redo-log command, for example: redo-log -r all The default is to show logs non-recursively, that is, it'll show when a target does redo-ifchange on another target, but it won't recurse into the logs for the latter target. With -r (recursive), it does. With -u (unchanged), it does even if redo-ifchange discovered that the target was already up-to-date; in that case, it prints the logs of the *most recent* time the target was generated. With --no-details, redo-log will show only the 'redo' lines, not the other log messages. For very noisy build systems (like recursing into a 'make' instance) this can be helpful to get an overview of what happened, without all the cruft. You can use the -f (follow) option like tail -f, to follow a build that's currently in progress until it finishes. redo itself spins up a copy of redo-log -r -f while it runs, so you can see what's going on. Still broken in this version: - No man page or new tests yet. - ANSI colors don't yet work (unless you use --raw-logs, which gives the old-style behaviour). - You can't redirect the output of a sub-redo to a file or a pipe right now, because redo-log is eating it. - The regex for matching 'redo' lines in the log is very gross. Instead, we should put the raw log files in a more machine-parseable format, and redo-log should turn that into human-readable format. - redo-log tries to "linearize" the logs, which makes them comprehensible even for a large parallel build. It recursively shows log messages for each target in depth-first tree order (by tracing into a new target every time it sees a 'redo' line). This works really well, but in some specific cases, the "topmost" redo instance can get stuck waiting for a jwack token, which makes it look like the whole build has stalled, when really redo-log is just waiting a long time for a particular subprocess to be able to continue. We'll need to add a specific workaround for that.
2018-11-03 22:09:18 -04:00
log_reader_pid = None
def close_stdin():
f = open('/dev/null')
os.dup2(f.fileno(), 0)
f.close()
def start_stdin_log_reader(status, details, pretty, color,
debug_locks, debug_pids):
"""Redirect stderr to a redo-log instance with the given options.
Then we automatically run logs.setup() to send the right data format
to that redo-log instance.
After this, be sure to run await_log_reader() before exiting.
"""
redo-log: capture and linearize the output of redo builds. redo now saves the stderr from every .do script, for every target, into a file in the .redo directory. That means you can look up the logs from the most recent build of any target using the new redo-log command, for example: redo-log -r all The default is to show logs non-recursively, that is, it'll show when a target does redo-ifchange on another target, but it won't recurse into the logs for the latter target. With -r (recursive), it does. With -u (unchanged), it does even if redo-ifchange discovered that the target was already up-to-date; in that case, it prints the logs of the *most recent* time the target was generated. With --no-details, redo-log will show only the 'redo' lines, not the other log messages. For very noisy build systems (like recursing into a 'make' instance) this can be helpful to get an overview of what happened, without all the cruft. You can use the -f (follow) option like tail -f, to follow a build that's currently in progress until it finishes. redo itself spins up a copy of redo-log -r -f while it runs, so you can see what's going on. Still broken in this version: - No man page or new tests yet. - ANSI colors don't yet work (unless you use --raw-logs, which gives the old-style behaviour). - You can't redirect the output of a sub-redo to a file or a pipe right now, because redo-log is eating it. - The regex for matching 'redo' lines in the log is very gross. Instead, we should put the raw log files in a more machine-parseable format, and redo-log should turn that into human-readable format. - redo-log tries to "linearize" the logs, which makes them comprehensible even for a large parallel build. It recursively shows log messages for each target in depth-first tree order (by tracing into a new target every time it sees a 'redo' line). This works really well, but in some specific cases, the "topmost" redo instance can get stuck waiting for a jwack token, which makes it look like the whole build has stalled, when really redo-log is just waiting a long time for a particular subprocess to be able to continue. We'll need to add a specific workaround for that.
2018-11-03 22:09:18 -04:00
global log_reader_pid
r, w = os.pipe() # main pipe to redo-log
ar, aw = os.pipe() # ack pipe from redo-log --ack-fd
sys.stdout.flush()
sys.stderr.flush()
pid = os.fork()
if pid:
# parent
log_reader_pid = pid
os.close(r)
os.close(aw)
b = os.read(ar, 8)
if not b:
# subprocess died without sending us anything: that's bad.
err('failed to start redo-log subprocess; cannot continue.\n')
os._exit(99)
assert b == 'REDO-OK\n'
# now we know the subproc is running and will report our errors
# to stderr, so it's okay to lose our own stderr.
os.close(ar)
os.dup2(w, 1)
os.dup2(w, 2)
os.close(w)
Workaround for completely broken file locking on Windows 10 WSL. WSL (Windows Services for Linux) provides a Linux-kernel-compatible ABI for userspace processes, but the current version doesn't not implement fcntl() locks at all; it just always returns success. See https://github.com/Microsoft/WSL/issues/1927. This causes us three kinds of problem: 1. sqlite3 in WAL mode gives "OperationalError: locking protocol". 1b. Other sqlite3 journal modes also don't work when used by multiple processes. 2. redo parallelism doesn't work, because we can't prevent the same target from being build several times simultaneously. 3. "redo-log -f" doesn't work, since it can't tell whether the log file it's tailing is "done" or not. To fix #1, we switch the sqlite3 journal back to PERSIST instead of WAL. We originally changed to WAL in commit 5156feae9d to reduce deadlocks on MacOS. That was never adequately explained, but PERSIST still acts weird on MacOS, so we'll only switch to PERSIST when we detect that locking is definitely broken. Sigh. To (mostly) fix #2, we disable any -j value > 1 when locking is broken. This prevents basic forms of parallelism, but doesn't stop you from re-entrantly starting other instances of redo. To fix that properly, we need to switch to a different locking mechanism entirely, which is tough in python. flock() locks probably work, for example, but python's locks lie and just use fcntl locks for those. To fix #3, we always force --no-log mode when we find that locking is broken.
2019-01-02 14:18:51 -05:00
logs.setup(tty=sys.stderr, parent_logs=True, pretty=False, color=False)
redo-log: capture and linearize the output of redo builds. redo now saves the stderr from every .do script, for every target, into a file in the .redo directory. That means you can look up the logs from the most recent build of any target using the new redo-log command, for example: redo-log -r all The default is to show logs non-recursively, that is, it'll show when a target does redo-ifchange on another target, but it won't recurse into the logs for the latter target. With -r (recursive), it does. With -u (unchanged), it does even if redo-ifchange discovered that the target was already up-to-date; in that case, it prints the logs of the *most recent* time the target was generated. With --no-details, redo-log will show only the 'redo' lines, not the other log messages. For very noisy build systems (like recursing into a 'make' instance) this can be helpful to get an overview of what happened, without all the cruft. You can use the -f (follow) option like tail -f, to follow a build that's currently in progress until it finishes. redo itself spins up a copy of redo-log -r -f while it runs, so you can see what's going on. Still broken in this version: - No man page or new tests yet. - ANSI colors don't yet work (unless you use --raw-logs, which gives the old-style behaviour). - You can't redirect the output of a sub-redo to a file or a pipe right now, because redo-log is eating it. - The regex for matching 'redo' lines in the log is very gross. Instead, we should put the raw log files in a more machine-parseable format, and redo-log should turn that into human-readable format. - redo-log tries to "linearize" the logs, which makes them comprehensible even for a large parallel build. It recursively shows log messages for each target in depth-first tree order (by tracing into a new target every time it sees a 'redo' line). This works really well, but in some specific cases, the "topmost" redo instance can get stuck waiting for a jwack token, which makes it look like the whole build has stalled, when really redo-log is just waiting a long time for a particular subprocess to be able to continue. We'll need to add a specific workaround for that.
2018-11-03 22:09:18 -04:00
else:
# child
try:
os.close(ar)
os.close(w)
os.dup2(r, 0)
os.close(r)
# redo-log sends to stdout (because if you ask for logs, that's
# the output you wanted!). But redo itself sends logs to stderr
# (because they're incidental to the thing you asked for).
# To make these semantics work, we point redo-log's stdout at
# our stderr when we launch it.
os.dup2(2, 1)
redo-log: capture and linearize the output of redo builds. redo now saves the stderr from every .do script, for every target, into a file in the .redo directory. That means you can look up the logs from the most recent build of any target using the new redo-log command, for example: redo-log -r all The default is to show logs non-recursively, that is, it'll show when a target does redo-ifchange on another target, but it won't recurse into the logs for the latter target. With -r (recursive), it does. With -u (unchanged), it does even if redo-ifchange discovered that the target was already up-to-date; in that case, it prints the logs of the *most recent* time the target was generated. With --no-details, redo-log will show only the 'redo' lines, not the other log messages. For very noisy build systems (like recursing into a 'make' instance) this can be helpful to get an overview of what happened, without all the cruft. You can use the -f (follow) option like tail -f, to follow a build that's currently in progress until it finishes. redo itself spins up a copy of redo-log -r -f while it runs, so you can see what's going on. Still broken in this version: - No man page or new tests yet. - ANSI colors don't yet work (unless you use --raw-logs, which gives the old-style behaviour). - You can't redirect the output of a sub-redo to a file or a pipe right now, because redo-log is eating it. - The regex for matching 'redo' lines in the log is very gross. Instead, we should put the raw log files in a more machine-parseable format, and redo-log should turn that into human-readable format. - redo-log tries to "linearize" the logs, which makes them comprehensible even for a large parallel build. It recursively shows log messages for each target in depth-first tree order (by tracing into a new target every time it sees a 'redo' line). This works really well, but in some specific cases, the "topmost" redo instance can get stuck waiting for a jwack token, which makes it look like the whole build has stalled, when really redo-log is just waiting a long time for a particular subprocess to be able to continue. We'll need to add a specific workaround for that.
2018-11-03 22:09:18 -04:00
argv = [
'redo-log',
'--recursive', '--follow',
'--ack-fd', str(aw),
('--status' if status and os.isatty(2) else '--no-status'),
redo-log: capture and linearize the output of redo builds. redo now saves the stderr from every .do script, for every target, into a file in the .redo directory. That means you can look up the logs from the most recent build of any target using the new redo-log command, for example: redo-log -r all The default is to show logs non-recursively, that is, it'll show when a target does redo-ifchange on another target, but it won't recurse into the logs for the latter target. With -r (recursive), it does. With -u (unchanged), it does even if redo-ifchange discovered that the target was already up-to-date; in that case, it prints the logs of the *most recent* time the target was generated. With --no-details, redo-log will show only the 'redo' lines, not the other log messages. For very noisy build systems (like recursing into a 'make' instance) this can be helpful to get an overview of what happened, without all the cruft. You can use the -f (follow) option like tail -f, to follow a build that's currently in progress until it finishes. redo itself spins up a copy of redo-log -r -f while it runs, so you can see what's going on. Still broken in this version: - No man page or new tests yet. - ANSI colors don't yet work (unless you use --raw-logs, which gives the old-style behaviour). - You can't redirect the output of a sub-redo to a file or a pipe right now, because redo-log is eating it. - The regex for matching 'redo' lines in the log is very gross. Instead, we should put the raw log files in a more machine-parseable format, and redo-log should turn that into human-readable format. - redo-log tries to "linearize" the logs, which makes them comprehensible even for a large parallel build. It recursively shows log messages for each target in depth-first tree order (by tracing into a new target every time it sees a 'redo' line). This works really well, but in some specific cases, the "topmost" redo instance can get stuck waiting for a jwack token, which makes it look like the whole build has stalled, when really redo-log is just waiting a long time for a particular subprocess to be able to continue. We'll need to add a specific workaround for that.
2018-11-03 22:09:18 -04:00
('--details' if details else '--no-details'),
('--pretty' if pretty else '--no-pretty'),
('--debug-locks' if debug_locks else '--no-debug-locks'),
('--debug-pids' if debug_pids else '--no-debug-pids'),
redo-log: capture and linearize the output of redo builds. redo now saves the stderr from every .do script, for every target, into a file in the .redo directory. That means you can look up the logs from the most recent build of any target using the new redo-log command, for example: redo-log -r all The default is to show logs non-recursively, that is, it'll show when a target does redo-ifchange on another target, but it won't recurse into the logs for the latter target. With -r (recursive), it does. With -u (unchanged), it does even if redo-ifchange discovered that the target was already up-to-date; in that case, it prints the logs of the *most recent* time the target was generated. With --no-details, redo-log will show only the 'redo' lines, not the other log messages. For very noisy build systems (like recursing into a 'make' instance) this can be helpful to get an overview of what happened, without all the cruft. You can use the -f (follow) option like tail -f, to follow a build that's currently in progress until it finishes. redo itself spins up a copy of redo-log -r -f while it runs, so you can see what's going on. Still broken in this version: - No man page or new tests yet. - ANSI colors don't yet work (unless you use --raw-logs, which gives the old-style behaviour). - You can't redirect the output of a sub-redo to a file or a pipe right now, because redo-log is eating it. - The regex for matching 'redo' lines in the log is very gross. Instead, we should put the raw log files in a more machine-parseable format, and redo-log should turn that into human-readable format. - redo-log tries to "linearize" the logs, which makes them comprehensible even for a large parallel build. It recursively shows log messages for each target in depth-first tree order (by tracing into a new target every time it sees a 'redo' line). This works really well, but in some specific cases, the "topmost" redo instance can get stuck waiting for a jwack token, which makes it look like the whole build has stalled, when really redo-log is just waiting a long time for a particular subprocess to be able to continue. We'll need to add a specific workaround for that.
2018-11-03 22:09:18 -04:00
]
if color != 1:
argv.append('--color' if color >= 2 else '--no-color')
argv.append('-')
redo-log: capture and linearize the output of redo builds. redo now saves the stderr from every .do script, for every target, into a file in the .redo directory. That means you can look up the logs from the most recent build of any target using the new redo-log command, for example: redo-log -r all The default is to show logs non-recursively, that is, it'll show when a target does redo-ifchange on another target, but it won't recurse into the logs for the latter target. With -r (recursive), it does. With -u (unchanged), it does even if redo-ifchange discovered that the target was already up-to-date; in that case, it prints the logs of the *most recent* time the target was generated. With --no-details, redo-log will show only the 'redo' lines, not the other log messages. For very noisy build systems (like recursing into a 'make' instance) this can be helpful to get an overview of what happened, without all the cruft. You can use the -f (follow) option like tail -f, to follow a build that's currently in progress until it finishes. redo itself spins up a copy of redo-log -r -f while it runs, so you can see what's going on. Still broken in this version: - No man page or new tests yet. - ANSI colors don't yet work (unless you use --raw-logs, which gives the old-style behaviour). - You can't redirect the output of a sub-redo to a file or a pipe right now, because redo-log is eating it. - The regex for matching 'redo' lines in the log is very gross. Instead, we should put the raw log files in a more machine-parseable format, and redo-log should turn that into human-readable format. - redo-log tries to "linearize" the logs, which makes them comprehensible even for a large parallel build. It recursively shows log messages for each target in depth-first tree order (by tracing into a new target every time it sees a 'redo' line). This works really well, but in some specific cases, the "topmost" redo instance can get stuck waiting for a jwack token, which makes it look like the whole build has stalled, when really redo-log is just waiting a long time for a particular subprocess to be able to continue. We'll need to add a specific workaround for that.
2018-11-03 22:09:18 -04:00
os.execvp(argv[0], argv)
except Exception, e: # pylint: disable=broad-except
redo-log: capture and linearize the output of redo builds. redo now saves the stderr from every .do script, for every target, into a file in the .redo directory. That means you can look up the logs from the most recent build of any target using the new redo-log command, for example: redo-log -r all The default is to show logs non-recursively, that is, it'll show when a target does redo-ifchange on another target, but it won't recurse into the logs for the latter target. With -r (recursive), it does. With -u (unchanged), it does even if redo-ifchange discovered that the target was already up-to-date; in that case, it prints the logs of the *most recent* time the target was generated. With --no-details, redo-log will show only the 'redo' lines, not the other log messages. For very noisy build systems (like recursing into a 'make' instance) this can be helpful to get an overview of what happened, without all the cruft. You can use the -f (follow) option like tail -f, to follow a build that's currently in progress until it finishes. redo itself spins up a copy of redo-log -r -f while it runs, so you can see what's going on. Still broken in this version: - No man page or new tests yet. - ANSI colors don't yet work (unless you use --raw-logs, which gives the old-style behaviour). - You can't redirect the output of a sub-redo to a file or a pipe right now, because redo-log is eating it. - The regex for matching 'redo' lines in the log is very gross. Instead, we should put the raw log files in a more machine-parseable format, and redo-log should turn that into human-readable format. - redo-log tries to "linearize" the logs, which makes them comprehensible even for a large parallel build. It recursively shows log messages for each target in depth-first tree order (by tracing into a new target every time it sees a 'redo' line). This works really well, but in some specific cases, the "topmost" redo instance can get stuck waiting for a jwack token, which makes it look like the whole build has stalled, when really redo-log is just waiting a long time for a particular subprocess to be able to continue. We'll need to add a specific workaround for that.
2018-11-03 22:09:18 -04:00
sys.stderr.write('redo-log: exec: %s\n' % e)
finally:
os._exit(99)
def await_log_reader():
"""Await the redo-log instance we redirected stderr to, if any."""
if not env.v.LOG:
return
redo-log: capture and linearize the output of redo builds. redo now saves the stderr from every .do script, for every target, into a file in the .redo directory. That means you can look up the logs from the most recent build of any target using the new redo-log command, for example: redo-log -r all The default is to show logs non-recursively, that is, it'll show when a target does redo-ifchange on another target, but it won't recurse into the logs for the latter target. With -r (recursive), it does. With -u (unchanged), it does even if redo-ifchange discovered that the target was already up-to-date; in that case, it prints the logs of the *most recent* time the target was generated. With --no-details, redo-log will show only the 'redo' lines, not the other log messages. For very noisy build systems (like recursing into a 'make' instance) this can be helpful to get an overview of what happened, without all the cruft. You can use the -f (follow) option like tail -f, to follow a build that's currently in progress until it finishes. redo itself spins up a copy of redo-log -r -f while it runs, so you can see what's going on. Still broken in this version: - No man page or new tests yet. - ANSI colors don't yet work (unless you use --raw-logs, which gives the old-style behaviour). - You can't redirect the output of a sub-redo to a file or a pipe right now, because redo-log is eating it. - The regex for matching 'redo' lines in the log is very gross. Instead, we should put the raw log files in a more machine-parseable format, and redo-log should turn that into human-readable format. - redo-log tries to "linearize" the logs, which makes them comprehensible even for a large parallel build. It recursively shows log messages for each target in depth-first tree order (by tracing into a new target every time it sees a 'redo' line). This works really well, but in some specific cases, the "topmost" redo instance can get stuck waiting for a jwack token, which makes it look like the whole build has stalled, when really redo-log is just waiting a long time for a particular subprocess to be able to continue. We'll need to add a specific workaround for that.
2018-11-03 22:09:18 -04:00
if log_reader_pid > 0:
# never actually close fd#1 or fd#2; insanity awaits.
# replace it with something else instead.
# Since our stdout/stderr are attached to redo-log's stdin,
# this will notify redo-log that it's time to die (after it finishes
# reading the logs)
out = open('/dev/tty', 'w')
os.dup2(out.fileno(), 1)
os.dup2(out.fileno(), 2)
os.waitpid(log_reader_pid, 0)
class ImmediateReturn(Exception):
def __init__(self, rv):
Exception.__init__(self, "immediate return with exit code %d" % rv)
self.rv = rv
class _BuildJob(object):
def __init__(self, t, sf, lock, shouldbuildfunc, donefunc):
self.t = t # original target name, not relative to env.v.BASE
self.sf = sf
self.tmpname = None
self.lock = lock
self.shouldbuildfunc = shouldbuildfunc
self.donefunc = donefunc
self.before_t = _try_stat(self.t)
# attributes of the running process
self.outfile = None
def start(self):
"""Actually start running this job in a subproc, if needed."""
assert self.lock.owned
try:
try:
redo-log: capture and linearize the output of redo builds. redo now saves the stderr from every .do script, for every target, into a file in the .redo directory. That means you can look up the logs from the most recent build of any target using the new redo-log command, for example: redo-log -r all The default is to show logs non-recursively, that is, it'll show when a target does redo-ifchange on another target, but it won't recurse into the logs for the latter target. With -r (recursive), it does. With -u (unchanged), it does even if redo-ifchange discovered that the target was already up-to-date; in that case, it prints the logs of the *most recent* time the target was generated. With --no-details, redo-log will show only the 'redo' lines, not the other log messages. For very noisy build systems (like recursing into a 'make' instance) this can be helpful to get an overview of what happened, without all the cruft. You can use the -f (follow) option like tail -f, to follow a build that's currently in progress until it finishes. redo itself spins up a copy of redo-log -r -f while it runs, so you can see what's going on. Still broken in this version: - No man page or new tests yet. - ANSI colors don't yet work (unless you use --raw-logs, which gives the old-style behaviour). - You can't redirect the output of a sub-redo to a file or a pipe right now, because redo-log is eating it. - The regex for matching 'redo' lines in the log is very gross. Instead, we should put the raw log files in a more machine-parseable format, and redo-log should turn that into human-readable format. - redo-log tries to "linearize" the logs, which makes them comprehensible even for a large parallel build. It recursively shows log messages for each target in depth-first tree order (by tracing into a new target every time it sees a 'redo' line). This works really well, but in some specific cases, the "topmost" redo instance can get stuck waiting for a jwack token, which makes it look like the whole build has stalled, when really redo-log is just waiting a long time for a particular subprocess to be able to continue. We'll need to add a specific workaround for that.
2018-11-03 22:09:18 -04:00
is_target, dirty = self.shouldbuildfunc(self.t)
except cycles.CyclicDependencyError:
err('cyclic dependency while checking %s\n' % _nice(self.t))
raise ImmediateReturn(208)
if not dirty:
# target doesn't need to be built; skip the whole task
if is_target:
meta('unchanged', state.target_relpath(self.t))
return self._finalize(0)
except ImmediateReturn, e:
return self._finalize(e.rv)
if env.v.NO_OOB or dirty == True: # pylint: disable=singleton-comparison
self._start_self()
else:
self._start_deps_unlocked(dirty)
def _start_self(self):
"""Run jobserver.start() to build this object's target file."""
assert self.lock.owned
t = self.t
sf = self.sf
newstamp = sf.read_stamp()
if (sf.is_generated and
newstamp != state.STAMP_MISSING and
(sf.is_override or state.detect_override(sf.stamp, newstamp))):
state.warn_override(_nice(t))
if not sf.is_override:
warn('%s - old: %r\n' % (_nice(t), sf.stamp))
warn('%s - new: %r\n' % (_nice(t), newstamp))
sf.set_override()
sf.save()
# fall through and treat it the same as a static file
if (os.path.exists(t) and not os.path.isdir(t + '/.')
and (sf.is_override or not sf.is_generated)):
# an existing source file that was not generated by us.
# This step is mentioned by djb in his notes.
# For example, a rule called default.c.do could be used to try
# to produce hello.c, but we don't want that to happen if
# hello.c was created by the end user.
debug2("-- static (%r)\n" % t)
sf.set_static()
sf.save()
return self._finalize(0)
sf.zap_deps1()
(dodir, dofile, _, basename, ext) = paths.find_do_file(sf)
if not dofile:
if os.path.exists(t):
sf.set_static()
sf.save()
return self._finalize(0)
else:
err('no rule to redo %r\n' % t)
return self._finalize(1)
# There is no good place for us to pre-create a temp file for
# stdout. The target dir might not exist yet, or it might currently
# exist but get wiped by the .do script. Other dirs, like the one
# containing the .do file, might be mounted readonly. We can put it
# in the system temp dir, but then we can't necessarily rename it to
# the target filename because it might cross filesystem boundaries.
# Also, if redo is interrupted, it would leave a temp file lying
# around. To avoid all this, use mkstemp() to create a temp file
# wherever it wants to, and immediately unlink it, but keep a file
# handle open. When the .do script finishes, we can copy the
# content out of that nameless file handle into a file in the same
# dir as the target (which by definition must now exist, if you
# wanted the target to exist).
#
# On the other hand, the $3 temp filename can be hardcoded to be in
# the target directory, even if that directory does not exist.
# It's not *redo*'s job to create that file. The .do file will
# create it, if it wants, and it's the .do file's job to first ensure
# that the directory exists.
tmpbase = os.path.join(dodir, basename + ext)
self.tmpname = tmpbase + '.redo.tmp'
unlink(self.tmpname)
ffd, fname = tempfile.mkstemp(prefix='redo.', suffix='.tmp')
close_on_exec(ffd, True)
os.unlink(fname)
self.outfile = os.fdopen(ffd, 'w+')
# this will run in the dofile's directory, so use only basenames here
2016-11-27 12:29:43 -08:00
arg1 = basename + ext # target name (including extension)
arg2 = basename # target name (without extension)
argv = ['sh', '-e',
dofile,
arg1,
arg2,
# $3 temp output file name
state.relpath(os.path.abspath(self.tmpname), dodir),
]
if env.v.VERBOSE:
argv[1] += 'v'
if env.v.XTRACE:
argv[1] += 'x'
firstline = open(os.path.join(dodir, dofile)).readline().strip()
if firstline.startswith('#!/'):
argv[0:2] = firstline[2:].split(' ')
# make sure to create the logfile *before* writing the meta() about it.
redo-log: capture and linearize the output of redo builds. redo now saves the stderr from every .do script, for every target, into a file in the .redo directory. That means you can look up the logs from the most recent build of any target using the new redo-log command, for example: redo-log -r all The default is to show logs non-recursively, that is, it'll show when a target does redo-ifchange on another target, but it won't recurse into the logs for the latter target. With -r (recursive), it does. With -u (unchanged), it does even if redo-ifchange discovered that the target was already up-to-date; in that case, it prints the logs of the *most recent* time the target was generated. With --no-details, redo-log will show only the 'redo' lines, not the other log messages. For very noisy build systems (like recursing into a 'make' instance) this can be helpful to get an overview of what happened, without all the cruft. You can use the -f (follow) option like tail -f, to follow a build that's currently in progress until it finishes. redo itself spins up a copy of redo-log -r -f while it runs, so you can see what's going on. Still broken in this version: - No man page or new tests yet. - ANSI colors don't yet work (unless you use --raw-logs, which gives the old-style behaviour). - You can't redirect the output of a sub-redo to a file or a pipe right now, because redo-log is eating it. - The regex for matching 'redo' lines in the log is very gross. Instead, we should put the raw log files in a more machine-parseable format, and redo-log should turn that into human-readable format. - redo-log tries to "linearize" the logs, which makes them comprehensible even for a large parallel build. It recursively shows log messages for each target in depth-first tree order (by tracing into a new target every time it sees a 'redo' line). This works really well, but in some specific cases, the "topmost" redo instance can get stuck waiting for a jwack token, which makes it look like the whole build has stalled, when really redo-log is just waiting a long time for a particular subprocess to be able to continue. We'll need to add a specific workaround for that.
2018-11-03 22:09:18 -04:00
# that way redo-log won't trace into an obsolete logfile.
if env.v.LOG:
open(state.logname(self.sf.id), 'w')
dof = state.File(name=os.path.join(dodir, dofile))
dof.set_static()
dof.save()
state.commit()
meta('do', state.target_relpath(t))
def call_subproc():
self._subproc(dodir, basename, ext, argv)
def call_exited(t, rv):
self._subproc_exited(t, rv, argv)
jobserver.start(t, call_subproc, call_exited)
def _start_deps_unlocked(self, dirty):
"""Run jobserver.start to build objects needed to check deps.
Out-of-band redo of some sub-objects. This happens when we're not
quite sure if t needs to be built or not (because some children
look dirty, but might turn out to be clean thanks to redo-stamp
checksums). We have to call redo-unlocked to figure it all out.
Note: redo-unlocked will handle all the updating of sf, so we don't
have to do it here, nor call _record_new_state. However, we have to
hold onto the lock because otherwise we would introduce a race
condition; that's why it's called redo-unlocked, because it doesn't
grab a lock.
"""
# FIXME: redo-unlocked is kind of a weird hack.
# Maybe we should just start jobs to build the necessary deps
# directly from this process, and when done, reconsider building
# the target we started with. But that makes this one process's
# build recursive, where currently it's flat.
here = os.getcwd()
def _fix(p):
return state.relpath(os.path.join(env.v.BASE, p), here)
argv = (['redo-unlocked', _fix(self.sf.name)] +
list(set(_fix(d.name) for d in dirty)))
meta('check', state.target_relpath(self.t))
state.commit()
def subtask():
os.environ['REDO_DEPTH'] = env.v.DEPTH + ' '
# python ignores SIGPIPE
signal.signal(signal.SIGPIPE, signal.SIG_DFL)
os.execvp(argv[0], argv)
assert 0
# returns only if there's an exception
def job_exited(t, rv):
return self._finalize(rv)
jobserver.start(self.t, jobfunc=subtask, donefunc=job_exited)
def _subproc(self, dodir, basename, ext, argv):
"""The function by jobserver.start to exec the build script.
This is run in the *child* process.
"""
# careful: REDO_PWD was the PWD relative to the STARTPATH at the time
# we *started* building the current target; but that target ran
# redo-ifchange, and it might have done it from a different directory
# than we started it in. So os.getcwd() might be != REDO_PWD right
# now.
assert state.is_flushed()
newp = os.path.realpath(dodir)
os.environ['REDO_PWD'] = state.relpath(newp, env.v.STARTDIR)
os.environ['REDO_TARGET'] = basename + ext
os.environ['REDO_DEPTH'] = env.v.DEPTH + ' '
cycles.add(self.lock.fid)
if dodir:
os.chdir(dodir)
os.dup2(self.outfile.fileno(), 1)
os.close(self.outfile.fileno())
close_on_exec(1, False)
if env.v.LOG:
cur_inode = str(os.fstat(2).st_ino)
if not env.v.LOG_INODE or cur_inode == env.v.LOG_INODE:
# .do script has *not* redirected stderr, which means we're
# using redo-log's log saving mode. That means subprocs
# should be logged to their own file. If the .do script
# *does* redirect stderr, that redirection should be inherited
# by subprocs, so we'd do nothing.
logf = open(state.logname(self.sf.id), 'w')
Workaround for completely broken file locking on Windows 10 WSL. WSL (Windows Services for Linux) provides a Linux-kernel-compatible ABI for userspace processes, but the current version doesn't not implement fcntl() locks at all; it just always returns success. See https://github.com/Microsoft/WSL/issues/1927. This causes us three kinds of problem: 1. sqlite3 in WAL mode gives "OperationalError: locking protocol". 1b. Other sqlite3 journal modes also don't work when used by multiple processes. 2. redo parallelism doesn't work, because we can't prevent the same target from being build several times simultaneously. 3. "redo-log -f" doesn't work, since it can't tell whether the log file it's tailing is "done" or not. To fix #1, we switch the sqlite3 journal back to PERSIST instead of WAL. We originally changed to WAL in commit 5156feae9d to reduce deadlocks on MacOS. That was never adequately explained, but PERSIST still acts weird on MacOS, so we'll only switch to PERSIST when we detect that locking is definitely broken. Sigh. To (mostly) fix #2, we disable any -j value > 1 when locking is broken. This prevents basic forms of parallelism, but doesn't stop you from re-entrantly starting other instances of redo. To fix that properly, we need to switch to a different locking mechanism entirely, which is tough in python. flock() locks probably work, for example, but python's locks lie and just use fcntl locks for those. To fix #3, we always force --no-log mode when we find that locking is broken.
2019-01-02 14:18:51 -05:00
new_inode = os.fstat(logf.fileno()).st_ino
os.environ['REDO_LOG'] = '1' # .do files can check this
Workaround for completely broken file locking on Windows 10 WSL. WSL (Windows Services for Linux) provides a Linux-kernel-compatible ABI for userspace processes, but the current version doesn't not implement fcntl() locks at all; it just always returns success. See https://github.com/Microsoft/WSL/issues/1927. This causes us three kinds of problem: 1. sqlite3 in WAL mode gives "OperationalError: locking protocol". 1b. Other sqlite3 journal modes also don't work when used by multiple processes. 2. redo parallelism doesn't work, because we can't prevent the same target from being build several times simultaneously. 3. "redo-log -f" doesn't work, since it can't tell whether the log file it's tailing is "done" or not. To fix #1, we switch the sqlite3 journal back to PERSIST instead of WAL. We originally changed to WAL in commit 5156feae9d to reduce deadlocks on MacOS. That was never adequately explained, but PERSIST still acts weird on MacOS, so we'll only switch to PERSIST when we detect that locking is definitely broken. Sigh. To (mostly) fix #2, we disable any -j value > 1 when locking is broken. This prevents basic forms of parallelism, but doesn't stop you from re-entrantly starting other instances of redo. To fix that properly, we need to switch to a different locking mechanism entirely, which is tough in python. flock() locks probably work, for example, but python's locks lie and just use fcntl locks for those. To fix #3, we always force --no-log mode when we find that locking is broken.
2019-01-02 14:18:51 -05:00
os.environ['REDO_LOG_INODE'] = str(new_inode)
os.dup2(logf.fileno(), 2)
close_on_exec(2, False)
logf.close()
else:
if 'REDO_LOG_INODE' in os.environ:
del os.environ['REDO_LOG_INODE']
os.environ['REDO_LOG'] = ''
signal.signal(signal.SIGPIPE, signal.SIG_DFL) # python ignores SIGPIPE
if env.v.VERBOSE or env.v.XTRACE:
logs.write('* %s' % ' '.join(argv).replace('\n', ' '))
os.execvp(argv[0], argv)
# FIXME: it would be nice to log the exit code to logf.
# But that would have to happen in the parent process, which doesn't
# have logf open.
assert 0
# returns only if there's an exception
def _subproc_exited(self, t, rv, argv):
"""Called by the jobserver when our subtask exits.
This is run in the *parent* process.
"""
try:
state.check_sane()
rv = self._record_new_state(t, rv, argv)
state.commit()
finally:
self._finalize(rv)
def _record_new_state(self, t, rv, argv):
"""After a subtask finishes, handle its changes to the output file.
This is run in the *parent* process.
This includes renaming temp files into place and detecting mistakes
(like writing directly to $1 instead of $3). We also have to record
the new file stamp data for the completed target.
"""
outfile = self.outfile
before_t = self.before_t
after_t = _try_stat(t)
st1 = os.fstat(outfile.fileno())
st2 = _try_stat(self.tmpname)
if (after_t and
(not before_t or before_t.st_mtime != after_t.st_mtime) and
not stat.S_ISDIR(after_t.st_mode)):
err('%s modified %s directly!\n' % (argv[2], t))
err('...you should update $3 (a temp file) or stdout, not $1.\n')
rv = 206
elif st2 and st1.st_size > 0:
err('%s wrote to stdout *and* created $3.\n' % argv[2])
err('...you should write status messages to stderr, not stdout.\n')
rv = 207
if rv == 0:
# FIXME: race condition here between updating stamp/is_generated
# and actually renaming the files into place. There needs to
# be some kind of two-stage commit, I guess.
if st1.st_size > 0 and not st2:
# script wrote to stdout. Copy its contents to the tmpfile.
unlink(self.tmpname)
try:
newf = open(self.tmpname, 'w')
except IOError, e:
dnt = os.path.dirname(os.path.abspath(t))
if not os.path.exists(dnt):
# This could happen, so report a simple error message
# that gives a hint for how to fix your .do script.
err('%s: target dir %r does not exist!\n' % (t, dnt))
else:
# This could happen for, eg. a permissions error on
# the target directory.
err('%s: copy stdout: %s\n' % (t, e))
rv = 209
else:
self.outfile.seek(0)
while 1:
b = self.outfile.read(1024*1024)
if not b:
break
newf.write(b)
newf.close()
st2 = _try_stat(self.tmpname)
if st2:
# either $3 file was created *or* stdout was written to.
# therefore tmpfile now exists.
try:
# Atomically replace the target file
os.rename(self.tmpname, t)
except OSError, e:
# This could happen for, eg. a permissions error on
# the target directory.
err('%s: rename %s: %s\n' % (t, self.tmpname, e))
rv = 209
else: # no output generated at all; that's ok
unlink(t)
sf = self.sf
sf.refresh()
sf.is_generated = True
sf.is_override = False
if sf.is_checked() or sf.is_changed():
# it got checked during the run; someone ran redo-stamp.
# update_stamp would call set_changed(); we don't want that,
# so only use read_stamp.
sf.stamp = sf.read_stamp()
else:
sf.csum = None
sf.update_stamp()
sf.set_changed()
else:
unlink(self.tmpname)
sf = self.sf
sf.set_failed()
sf.zap_deps2()
sf.save()
outfile.close()
meta('done', '%d %s' % (rv, state.target_relpath(self.t)))
return rv
def _finalize(self, rv):
"""After a target is built, report completion and unlock.
This is run in the *parent* process.
Note: you usually need to call _record_new_state() first.
"""
try:
self.donefunc(self.t, rv)
assert self.lock.owned
finally:
self.lock.unlock()
def run(targets, shouldbuildfunc):
"""Build the given list of targets, if necessary.
Builds in parallel using whatever jobserver tokens can be obtained.
Args:
targets: a list of target filenames to consider building.
shouldbuildfunc: a function(target) which determines whether the given
target needs to be built, as of the time it is called.
Returns:
0 if all necessary targets returned exit code zero; nonzero otherwise.
"""
2010-11-19 07:21:09 -08:00
retcode = [0] # a list so that it can be reassigned from done()
if env.v.SHUFFLE:
import random
2010-11-19 07:21:09 -08:00
random.shuffle(targets)
locked = []
def job_exited(t, rv):
2010-11-19 07:21:09 -08:00
if rv:
retcode[0] = 1
if env.v.TARGET and not env.v.UNLOCKED:
me = os.path.join(env.v.STARTDIR,
os.path.join(env.v.PWD, env.v.TARGET))
myfile = state.File(name=me)
selflock = state.Lock(state.LOG_LOCK_MAGIC + myfile.id)
else:
selflock = myfile = me = None
def cheat():
if not selflock:
return 0
selflock.trylock()
if not selflock.owned:
# redo-log already owns it: let's cheat.
# Give ourselves one extra token so that the "foreground" log
# can always make progress.
return 1
else:
# redo-log isn't watching us (yet)
selflock.unlock()
return 0
2010-11-19 07:21:09 -08:00
# In the first cycle, we just build as much as we can without worrying
# about any lock contention. If someone else has it locked, we move on.
seen = {}
lock = None
2010-11-19 07:21:09 -08:00
for t in targets:
if not t:
err('cannot build the empty target ("").\n')
retcode[0] = 204
break
assert state.is_flushed()
if t in seen:
continue
seen[t] = 1
if not jobserver.has_token():
state.commit()
jobserver.ensure_token_or_cheat(t, cheat)
if retcode[0] and not env.v.KEEP_GOING:
break
if not state.check_sane():
err('.redo directory disappeared; cannot continue.\n')
retcode[0] = 205
break
f = state.File(name=t)
lock = state.Lock(f.id)
if env.v.UNLOCKED:
lock.owned = True
else:
lock.trylock()
2010-11-19 07:21:09 -08:00
if not lock.owned:
meta('locked', state.target_relpath(t))
locked.append((f.id, t, f.name))
2010-11-19 07:21:09 -08:00
else:
# We had to create f before we had a lock, because we need f.id
# to make the lock. But someone may have updated the state
# between then and now.
# FIXME: separate obtaining the fid from creating the File.
# FIXME: maybe integrate locking into the File object?
f.refresh()
_BuildJob(t, f, lock,
shouldbuildfunc=shouldbuildfunc,
donefunc=job_exited).start()
state.commit()
assert state.is_flushed()
lock = None
del lock
# Now we've built all the "easy" ones. Go back and just wait on the
# remaining ones one by one. There's no reason to do it any more
# efficiently, because if these targets were previously locked, that
# means someone else was building them; thus, we probably won't need to
# do anything. The only exception is if we're invoked as redo instead
# of redo-ifchange; then we have to redo it even if someone else already
# did. But that should be rare.
while locked or jobserver.running():
state.commit()
jobserver.wait_all()
assert jobserver._mytokens <= 1 # pylint: disable=protected-access
jobserver.ensure_token_or_cheat('self', cheat)
# at this point, we don't have any children holding any tokens, so
# it's okay to block below.
if retcode[0] and not env.v.KEEP_GOING:
break
2010-11-19 07:21:09 -08:00
if locked:
if not state.check_sane():
err('.redo directory disappeared; cannot continue.\n')
retcode[0] = 205
break
fid, t, _ = locked.pop(0)
lock = state.Lock(fid)
backoff = 0.01
lock.trylock()
while not lock.owned:
# Don't spin with 100% CPU while we fight for the lock.
import random
time.sleep(random.random() * min(backoff, 1.0))
backoff *= 2
# after printing this line, redo-log will recurse into t,
# whether it's us building it, or someone else.
meta('waiting', state.target_relpath(t))
try:
lock.check()
except cycles.CyclicDependencyError:
err('cyclic dependency while building %s\n' % _nice(t))
retcode[0] = 208
return retcode[0]
# this sequence looks a little silly, but the idea is to
# give up our personal token while we wait for the lock to
# be released; but we should never run ensure_token() while
# holding a lock, or we could cause deadlocks.
jobserver.release_mine()
lock.waitlock()
# now t is definitely free, so we get to decide whether
# to build it.
lock.unlock()
jobserver.ensure_token_or_cheat(t, cheat)
lock.trylock()
assert lock.owned
meta('unlocked', state.target_relpath(t))
if state.File(name=t).is_failed():
err('%s: failed in another thread\n' % _nice(t))
2010-11-19 07:21:09 -08:00
retcode[0] = 2
lock.unlock()
else:
_BuildJob(t, state.File(fid=fid), lock,
shouldbuildfunc=shouldbuildfunc,
donefunc=job_exited).start()
lock = None
state.commit()
2010-11-19 07:21:09 -08:00
return retcode[0]