Use mkstemp() to create the stdout temp file, and simplify $3 path.

Previously, we'd try to put the stdout temp file in the same dir as the
target, if that dir exists.  Otherwise we'd walk up the directory tree
looking for a good place.  But this would go wrong if the directory we
chose got *deleted* during the run of the .do file.

Instead, we switch to an entirely new design: we use mkstemp() to
generate a temp file in the standard temp file location (probably
/tmp), then open it and immediately delete it, so the .do file can't
cause any unexpected behaviour.  After the .do file exits, we use our
still-open fd to the stdout file to read the content back out.

In the old implementation, we also put the $3 in the "adjusted"
location that depended whether the target dir already existed, just for
consistency.  But that was never necessary: we didn't create the $3
file, and if the .do script wants to write to $3, it should create the
target dir first anyway.  So change it to *always* use a $3 temp
filename in the target dir, which is much simpler and so has fewer edge
cases.

Add t/202-del/deltest4 with some tests for all these edge cases.

Reported-by: Jeff Stearns <jeff.stearns@gmail.com>
This commit is contained in:
Avery Pennarun 2018-12-12 03:37:44 +00:00
commit d95277d121
10 changed files with 177 additions and 119 deletions

View file

@ -4,7 +4,7 @@
# For the full version, visit http://github.com/apenwarr/redo
#
# The author disclaims copyright to this source file and hereby places it in
# the public domain. (2010 12 14; updated 2018 12 11)
# the public domain. (2010 12 14; updated 2018 12 13)
#
USAGE="
usage: do [-d] [-x] [-v] [-c] <targets...>
@ -77,6 +77,7 @@ if [ -z "$DO_BUILT" -a "$_cmd" != "redo-whichdo" ]; then
if [ "$#" -eq 0 ] && [ "$_cmd" = "do" -o "$_cmd" = "redo" ]; then
set all # only toplevel redo has a default target
fi
export DO_STARTDIR="$PWD"
export DO_BUILT="$PWD/.do_built"
if [ -z "$_do_opt_clean" -a -e "$DO_BUILT" ]; then
echo "do: Incremental mode. Use -c for clean rebuild." >&2
@ -89,7 +90,7 @@ if [ -z "$DO_BUILT" -a "$_cmd" != "redo-whichdo" ]; then
done <"$DO_BUILT.new" |
xargs -0 rm -f 2>/dev/null
mv "$DO_BUILT.new" "$DO_BUILT"
DO_PATH=$DO_BUILT.dir
export DO_PATH="$DO_BUILT.dir"
export PATH="$DO_PATH:$PATH"
rm -rf "$DO_PATH"
mkdir "$DO_PATH"
@ -128,11 +129,23 @@ _endswith()
}
# Prints $1 if it's absolute, or $2/$1 if $1 is not absolute.
_abspath()
{
local here="$2" there="$1"
if _startswith "$1" "/"; then
echo "$1"
else
echo "$2/$1"
fi
}
# Prints $1 as a path relative to $PWD (not starting with /).
# If it already doesn't start with a /, doesn't change the string.
_relpath()
{
local here="$(command pwd)" there="$1" out= hadslash=
local here="$2" there="$1" out= hadslash=
#echo "RP start '$there' hs='$hadslash'" >&2
_startswith "$there" "/" || { echo "$there" && return; }
[ "$there" != "/" ] && _endswith "$there" "/" && hadslash=/
@ -158,12 +171,12 @@ _relpath()
# For example, a/b/../c will be reduced to just a/c.
_normpath()
(
local path="$1" out= isabs=
local path="$1" relto="$2" out= isabs=
#echo "NP start '$path'" >&2
if _startswith "$path" "/"; then
isabs=1
else
path="${PWD%/}/$path"
path="${relto%/}/$path"
fi
set -f
IFS=/
@ -180,7 +193,7 @@ _normpath()
if [ -n "$isabs" ]; then
echo "${out:-/}"
else
_relpath "${out:-/}"
_relpath "${out:-/}" "$relto"
fi
)
@ -222,7 +235,7 @@ _find_dofiles()
[ -n "$dodir" ] && dodir=${dodir%/}/
#echo "_find_dofiles: '$dodir' '$dofile'" >&2
_find_dofiles_pwd "$dodir" "$dofile" && return 0
newdir=$(_normpath "${dodir}..")
newdir=$(_normpath "${dodir}.." "$PWD")
[ "$newdir" = "$dodir" ] && break
dodir=$newdir
done
@ -257,25 +270,26 @@ _run_dofile()
cmd=${line1#"#!/"}
if [ "$cmd" != "$line1" ]; then
set -$_do_opt_verbose$_do_opt_exec
exec /$cmd "$PWD/$dofile" "$@" >"$tmp.tmp2"
exec /$cmd "$PWD/$dofile" "$@"
else
set -$_do_opt_verbose$_do_opt_exec
:; . "$PWD/$dofile" >"$tmp.tmp2"
:; . "$PWD/$dofile"
fi
}
# Find and run the right .do file, starting in dir $1, for target $2, using
# filename $3 as the temporary output file. Renames the temp file to $2 when
# Find and run the right .do file, starting in dir $1, for target $2,
# providing a temporary output file as $3. Renames the temp file to $2 when
# done.
_do()
{
local dir="$1" target="$2" tmp="$3" dopath= dodir= dofile= ext=
local dir="$1" target="$1$2" tmp="$1$2.redo.tmp"
local dopath= dodir= dofile= ext=
if [ "$_cmd" = "redo" ] ||
( [ ! -e "$target" -o -d "$target" ] &&
[ ! -e "$target.did" ] ); then
printf '%sdo %s%s%s%s\n' \
"$green" "$DO_DEPTH" "$bold" "$dir$target" "$plain" >&2
"$green" "$DO_DEPTH" "$bold" "$target" "$plain" >&2
dopath=$(_find_dofile "$target")
if [ ! -e "$dopath" ]; then
echo "do: $target: no .do file ($PWD)" >&2
@ -292,56 +306,60 @@ _do()
target=$PWD/$target
tmp=$PWD/$tmp
cd "$dodir" || return 99
target=$(_relpath "$target") || return 98
tmp=$(_relpath "$tmp") || return 97
target=$(_relpath "$target" "$PWD") || return 98
tmp=$(_relpath "$tmp" "$PWD") || return 97
base=${target%$ext}
[ ! -e "$DO_BUILT" ] || [ ! -d "$(dirname "$target")" ] ||
: >>"$target.did.tmp"
( _run_dofile "$target" "$base" "$tmp.tmp" )
# $qtmp is a temporary file used to capture stdout.
# Since it might be accidentally deleted as a .do file
# does its work, we create it, then open two fds to it,
# then immediately delete the name. We use one fd to
# redirect to stdout, and the other to read from after,
# because there's no way to fseek(fd, 0) in sh.
qtmp=$DO_PATH/do.$$.tmp
(
rm -f "$qtmp"
( _run_dofile "$target" "$base" "$tmp" >&3 3>&- 4<&- )
rv=$?
if [ $rv != 0 ]; then
printf "do: %s%s\n" "$DO_DEPTH" \
"$dir$target: got exit code $rv" >&2
"$target: got exit code $rv" >&2
rm -f "$tmp.tmp" "$tmp.tmp2" "$target.did"
return $rv
fi
echo "$PWD/$target" >>"$DO_BUILT"
mv "$tmp.tmp" "$target" 2>/dev/null ||
! test -s "$tmp.tmp2" ||
mv "$tmp.tmp2" "$target" 2>/dev/null
if [ ! -e "$tmp" ]; then
# if $3 wasn't created, copy from stdout file
cat <&4 >$tmp
# if that's zero length too, forget it
[ -s "$tmp" ] || rm -f "$tmp"
fi
) 3>$qtmp 4<$qtmp || return
mv "$tmp" "$target" 2>/dev/null
[ -e "$target.did.tmp" ] &&
mv "$target.did.tmp" "$target.did" ||
: >>"$target.did"
rm -f "$tmp.tmp2"
else
_debug "do $DO_DEPTH$target exists." >&2
fi
}
# Make corrections for directories that don't actually exist yet.
_dir_shovel()
{
local dir base
xdir=$1 xbase=$2 xbasetmp=$2
while [ ! -d "$xdir" -a -n "$xdir" ]; do
_dirsplit "${xdir%/}"
xbasetmp=${_dirsplit_base}__$xbasetmp
xdir=$_dirsplit_dir xbase=$_dirsplit_base/$xbase
done
_debug "xbasetmp='$xbasetmp'" >&2
}
# Implementation of the "redo" command.
_redo()
{
local i startdir="$PWD" dir base
set +e
for i in "$@"; do
i=$(_abspath "$i" "$startdir")
(
cd "$DO_STARTDIR" || return 99
i=$(_normpath "$(_relpath "$i" "$PWD")" "$PWD")
_dirsplit "$i"
_dir_shovel "$_dirsplit_dir" "$_dirsplit_base"
dir=$xdir base=$xbase basetmp=$xbasetmp
( cd "$dir" && _do "$dir" "$base" "$basetmp" )
dir=$_dirsplit_dir base=$_dirsplit_base
_do "$dir" "$base"
)
[ "$?" = 0 ] || return 1
done
}

View file

@ -102,31 +102,32 @@ check_s "" "$_dirsplit_base"
SECTION _relpath
check "a/b/c" _relpath "$PWD/a/b/c"
check "../a/b/c" _relpath "$PWD/../a/b/c"
check "" _relpath "$PWD"
(cd / && check "a/b/c" _relpath a/b/c)
(cd / && check "a/b/c" _relpath /a/b/c)
(cd / && check "" _relpath /)
(cd /usr/bin && check "../lib" _relpath /usr/lib)
(cd /usr/bin && check "../lib/" _relpath /usr/lib/)
(cd /usr/bin && check "../.." _relpath /)
(cd fakedir && check ".." _relpath ..)
(cd fakedir && check "../" _relpath ../)
(cd fakedir && check "../fakedir" _relpath ../fakedir)
x=$PWD/x
check "a/b/c" _relpath "$x/a/b/c" "$x"
check "../a/b/c" _relpath "$x/../a/b/c" "$x"
check "" _relpath "$x" "$x"
check "a/b/c" _relpath a/b/c "/"
check "a/b/c" _relpath /a/b/c "/"
check "" _relpath / "/"
check "../lib" _relpath /usr/lib "/usr/bin"
check "../lib/" _relpath /usr/lib/ "/usr/bin"
check "../.." _relpath / "/usr/bin"
check ".." _relpath .. "$PWD/fakedir"
check "../" _relpath ../ "$PWD/fakedir"
check "../fakedir" _relpath ../fakedir "$PWD/fakedir"
SECTION _normpath
check "/usr/lib" _normpath /usr/../usr/bin/../lib
check "/" _normpath /a/b/c/../../..
check "/" _normpath /
check "../a" _normpath ../a
(cd fakedir && check "../a/b" _normpath ../a/b)
(cd fakedir && check ".." _normpath ..)
(cd / && check "tuv" _normpath a/b/../../tuv)
(cd / && check "" _normpath a/b/../..)
check ".." _normpath ../
check ".." _normpath ..
check "/usr/lib" _normpath /usr/../usr/bin/../lib "$x"
check "/" _normpath /a/b/c/../../.. "$x"
check "/" _normpath / "$x"
check "../a" _normpath ../a "$x"
check "../a/b" _normpath ../a/b "$x/fakedir"
check ".." _normpath .. "$x/fakedir"
check "tuv" _normpath a/b/../../tuv "/"
check "" _normpath a/b/../.. "/"
check ".." _normpath ../ "$x"
check ".." _normpath .. "$x"
SECTION _find_dofile

View file

@ -1,5 +1,5 @@
"""Code for parallel-building a set of targets, if needed."""
import sys, os, errno, stat, signal, time
import errno, os, stat, signal, sys, tempfile, time
from . import cycles, env, jobserver, logs, state, paths
from .helpers import unlink, close_on_exec
from .logs import debug2, err, warn, meta
@ -120,20 +120,14 @@ class _BuildJob(object):
def __init__(self, t, sf, lock, shouldbuildfunc, donefunc):
self.t = t # original target name, not relative to env.v.BASE
self.sf = sf
tmpbase = t
while not os.path.isdir(os.path.dirname(tmpbase) or '.'):
ofs = tmpbase.rfind('/')
assert ofs >= 0
tmpbase = tmpbase[:ofs] + '__' + tmpbase[ofs+1:]
self.tmpname1 = '%s.redo1.tmp' % tmpbase
self.tmpname2 = '%s.redo2.tmp' % tmpbase
self.tmpname = None
self.lock = lock
self.shouldbuildfunc = shouldbuildfunc
self.donefunc = donefunc
self.before_t = _try_stat(self.t)
# attributes of the running process
self.f = None
self.outfile = None
def start(self):
"""Actually start running this job in a subproc, if needed."""
@ -195,11 +189,32 @@ class _BuildJob(object):
else:
err('no rule to redo %r\n' % t)
return self._finalize(1)
unlink(self.tmpname1)
unlink(self.tmpname2)
ffd = os.open(self.tmpname1, os.O_CREAT|os.O_RDWR|os.O_EXCL, 0666)
# There is no good place for us to pre-create a temp file for
# stdout. The target dir might not exist yet, or it might currently
# exist but get wiped by the .do script. Other dirs, like the one
# containing the .do file, might be mounted readonly. We can put it
# in the system temp dir, but then we can't necessarily rename it to
# the target filename because it might cross filesystem boundaries.
# Also, if redo is interrupted, it would leave a temp file lying
# around. To avoid all this, use mkstemp() to create a temp file
# wherever it wants to, and immediately unlink it, but keep a file
# handle open. When the .do script finishes, we can copy the
# content out of that nameless file handle into a file in the same
# dir as the target (which by definition must now exist, if you
# wanted the target to exist).
#
# On the other hand, the $3 temp filename can be hardcoded to be in
# the target directory, even if that directory does not exist.
# It's not *redo*'s job to create that file. The .do file will
# create it, if it wants, and it's the .do file's job to first ensure
# that the directory exists.
tmpbase = os.path.join(dodir, basename + ext)
self.tmpname = tmpbase + '.redo.tmp'
unlink(self.tmpname)
ffd, fname = tempfile.mkstemp(prefix='redo.', suffix='.tmp')
close_on_exec(ffd, True)
self.f = os.fdopen(ffd, 'w+')
os.unlink(fname)
self.outfile = os.fdopen(ffd, 'w+')
# this will run in the dofile's directory, so use only basenames here
arg1 = basename + ext # target name (including extension)
arg2 = basename # target name (without extension)
@ -207,8 +222,8 @@ class _BuildJob(object):
dofile,
arg1,
arg2,
# temp output file name
state.relpath(os.path.abspath(self.tmpname2), dodir),
# $3 temp output file name
state.relpath(os.path.abspath(self.tmpname), dodir),
]
if env.v.VERBOSE:
argv[1] += 'v'
@ -287,8 +302,8 @@ class _BuildJob(object):
cycles.add(self.lock.fid)
if dodir:
os.chdir(dodir)
os.dup2(self.f.fileno(), 1)
os.close(self.f.fileno())
os.dup2(self.outfile.fileno(), 1)
os.close(self.outfile.fileno())
close_on_exec(1, False)
if env.v.LOG:
cur_inode = str(os.fstat(2).st_ino)
@ -340,21 +355,12 @@ class _BuildJob(object):
(like writing directly to $1 instead of $3). We also have to record
the new file stamp data for the completed target.
"""
f = self.f
outfile = self.outfile
before_t = self.before_t
after_t = _try_stat(t)
st1 = os.fstat(f.fileno())
st1b = _try_stat(self.tmpname1)
st2 = _try_stat(self.tmpname2)
if not st1.st_size and not st1b:
warn("%s: %s deleted %s; don't do that.\n"
% (_nice(t), argv[2], self.tmpname1))
# not fatal, continue
if st1.st_size > 0 and not st1b:
err('%s: %s wrote to stdout but deleted %s.\n'
% (_nice(t), argv[2], self.tmpname1))
rv = 207
elif (after_t and
st1 = os.fstat(outfile.fileno())
st2 = _try_stat(self.tmpname)
if (after_t and
(not before_t or before_t.st_mtime != after_t.st_mtime) and
not stat.S_ISDIR(after_t.st_mode)):
err('%s modified %s directly!\n' % (argv[2], t))
@ -368,9 +374,11 @@ class _BuildJob(object):
# FIXME: race condition here between updating stamp/is_generated
# and actually renaming the files into place. There needs to
# be some kind of two-stage commit, I guess.
if st2:
if st1.st_size > 0:
# script wrote to stdout. Copy its contents to the tmpfile.
unlink(self.tmpname)
try:
os.rename(self.tmpname2, t)
newf = open(self.tmpname, 'w')
except OSError, e:
dnt = os.path.dirname(t)
if not os.path.exists(dnt):
@ -380,23 +388,32 @@ class _BuildJob(object):
else:
# I don't know why this would happen, so raise the
# full exception if it ever does.
err('%s: rename %s: %s\n' % (t, self.tmpname2, e))
err('%s: save stdout to %s: %s\n'
% (t, self.tmpname, e))
raise
unlink(self.tmpname1)
elif st1.st_size > 0:
try:
os.rename(self.tmpname1, t)
except OSError, e:
if e.errno == errno.ENOENT:
unlink(t)
else:
err('%s: can\'t save stdout to %r: %s\n' %
(argv[2], t, e.strerror))
rv = 1000
self.outfile.seek(0)
while 1:
b = self.outfile.read(1024*1024)
if not b:
break
newf.write(b)
newf.close()
st2 = _try_stat(self.tmpname)
if st2:
os.unlink(self.tmpname2)
# either $3 file was created *or* stdout was written to.
# therefore tmpfile now exists.
try:
# Atomically replace the target file
os.rename(self.tmpname, t)
except OSError, e:
# Nowadays self.tmpname is in the same directory as
# t, so there's no very good reason for this to
# fail. Thus, raise the full exception if it ever
# does.
err('%s: rename %s: %s\n' % (t, self.tmpname, e))
raise
else: # no output generated at all; that's ok
unlink(self.tmpname1)
unlink(t)
sf = self.sf
sf.refresh()
@ -412,13 +429,12 @@ class _BuildJob(object):
sf.update_stamp()
sf.set_changed()
else:
unlink(self.tmpname1)
unlink(self.tmpname2)
unlink(self.tmpname)
sf = self.sf
sf.set_failed()
sf.zap_deps2()
sf.save()
f.close()
outfile.close()
meta('done', '%d %s' % (rv, state.target_relpath(self.t)))
return rv

View file

@ -1,2 +1,3 @@
deltest2
/destruct
/x

View file

@ -1 +1 @@
redo deltest deltest2 deltest3
redo deltest deltest2 deltest3 deltest4

View file

@ -1,2 +1,2 @@
rm -rf destruct
rm -rf destruct x
rm -f deltest2 *~ .*~

View file

@ -0,0 +1,3 @@
rm -rf x
mkdir x
echo 'stdout'

View file

@ -0,0 +1,4 @@
rm -rf x
mkdir x
echo 'redir' >$3

View file

@ -10,7 +10,7 @@ cat >destruct2.do <<-EOF
echo 'stdout'
EOF
# deleting unused stdout file is a warning only
# deleting unused stdout file is a warning at most
redo destruct1 2>destruct1.log || exit 11
[ "$(cat destruct1)" = "redir" ] || exit 12

15
t/202-del/deltest4.do Normal file
View file

@ -0,0 +1,15 @@
rm -rf x
redo x/a.spam2
[ "$(cat x/a.spam2)" = "redir" ] || exit 11
redo x/a.spam2
[ "$(cat x/a.spam2)" = "redir" ] || exit 12
redo x/b.spam2
[ "$(cat x/b.spam2)" = "redir" ] || exit 13
rm -rf x
redo x/a.spam1
[ "$(cat x/a.spam1)" = "stdout" ] || exit 21
redo x/a.spam1
[ "$(cat x/a.spam1)" = "stdout" ] || exit 22
redo x/b.spam1
[ "$(cat x/b.spam1)" = "stdout" ] || exit 23