From d95277d12173c0128624de9d85bc2aca35abd98c Mon Sep 17 00:00:00 2001 From: Avery Pennarun Date: Wed, 12 Dec 2018 03:37:44 +0000 Subject: [PATCH] Use mkstemp() to create the stdout temp file, and simplify $3 path. Previously, we'd try to put the stdout temp file in the same dir as the target, if that dir exists. Otherwise we'd walk up the directory tree looking for a good place. But this would go wrong if the directory we chose got *deleted* during the run of the .do file. Instead, we switch to an entirely new design: we use mkstemp() to generate a temp file in the standard temp file location (probably /tmp), then open it and immediately delete it, so the .do file can't cause any unexpected behaviour. After the .do file exits, we use our still-open fd to the stdout file to read the content back out. In the old implementation, we also put the $3 in the "adjusted" location that depended whether the target dir already existed, just for consistency. But that was never necessary: we didn't create the $3 file, and if the .do script wants to write to $3, it should create the target dir first anyway. So change it to *always* use a $3 temp filename in the target dir, which is much simpler and so has fewer edge cases. Add t/202-del/deltest4 with some tests for all these edge cases. Reported-by: Jeff Stearns --- minimal/do | 110 +++++++++++++++++++++--------------- minimal/do.test | 45 +++++++-------- redo/builder.py | 112 +++++++++++++++++++++---------------- t/202-del/.gitignore | 1 + t/202-del/all.do | 2 +- t/202-del/clean.do | 2 +- t/202-del/default.spam1.do | 3 + t/202-del/default.spam2.do | 4 ++ t/202-del/deltest3.do | 2 +- t/202-del/deltest4.do | 15 +++++ 10 files changed, 177 insertions(+), 119 deletions(-) create mode 100644 t/202-del/default.spam1.do create mode 100644 t/202-del/default.spam2.do create mode 100644 t/202-del/deltest4.do diff --git a/minimal/do b/minimal/do index 8444c21..61c05d6 100755 --- a/minimal/do +++ b/minimal/do @@ -4,7 +4,7 @@ # For the full version, visit http://github.com/apenwarr/redo # # The author disclaims copyright to this source file and hereby places it in -# the public domain. (2010 12 14; updated 2018 12 11) +# the public domain. (2010 12 14; updated 2018 12 13) # USAGE=" usage: do [-d] [-x] [-v] [-c] @@ -77,6 +77,7 @@ if [ -z "$DO_BUILT" -a "$_cmd" != "redo-whichdo" ]; then if [ "$#" -eq 0 ] && [ "$_cmd" = "do" -o "$_cmd" = "redo" ]; then set all # only toplevel redo has a default target fi + export DO_STARTDIR="$PWD" export DO_BUILT="$PWD/.do_built" if [ -z "$_do_opt_clean" -a -e "$DO_BUILT" ]; then echo "do: Incremental mode. Use -c for clean rebuild." >&2 @@ -89,7 +90,7 @@ if [ -z "$DO_BUILT" -a "$_cmd" != "redo-whichdo" ]; then done <"$DO_BUILT.new" | xargs -0 rm -f 2>/dev/null mv "$DO_BUILT.new" "$DO_BUILT" - DO_PATH=$DO_BUILT.dir + export DO_PATH="$DO_BUILT.dir" export PATH="$DO_PATH:$PATH" rm -rf "$DO_PATH" mkdir "$DO_PATH" @@ -128,11 +129,23 @@ _endswith() } +# Prints $1 if it's absolute, or $2/$1 if $1 is not absolute. +_abspath() +{ + local here="$2" there="$1" + if _startswith "$1" "/"; then + echo "$1" + else + echo "$2/$1" + fi +} + + # Prints $1 as a path relative to $PWD (not starting with /). # If it already doesn't start with a /, doesn't change the string. _relpath() { - local here="$(command pwd)" there="$1" out= hadslash= + local here="$2" there="$1" out= hadslash= #echo "RP start '$there' hs='$hadslash'" >&2 _startswith "$there" "/" || { echo "$there" && return; } [ "$there" != "/" ] && _endswith "$there" "/" && hadslash=/ @@ -158,12 +171,12 @@ _relpath() # For example, a/b/../c will be reduced to just a/c. _normpath() ( - local path="$1" out= isabs= + local path="$1" relto="$2" out= isabs= #echo "NP start '$path'" >&2 if _startswith "$path" "/"; then isabs=1 else - path="${PWD%/}/$path" + path="${relto%/}/$path" fi set -f IFS=/ @@ -180,7 +193,7 @@ _normpath() if [ -n "$isabs" ]; then echo "${out:-/}" else - _relpath "${out:-/}" + _relpath "${out:-/}" "$relto" fi ) @@ -222,7 +235,7 @@ _find_dofiles() [ -n "$dodir" ] && dodir=${dodir%/}/ #echo "_find_dofiles: '$dodir' '$dofile'" >&2 _find_dofiles_pwd "$dodir" "$dofile" && return 0 - newdir=$(_normpath "${dodir}..") + newdir=$(_normpath "${dodir}.." "$PWD") [ "$newdir" = "$dodir" ] && break dodir=$newdir done @@ -257,25 +270,26 @@ _run_dofile() cmd=${line1#"#!/"} if [ "$cmd" != "$line1" ]; then set -$_do_opt_verbose$_do_opt_exec - exec /$cmd "$PWD/$dofile" "$@" >"$tmp.tmp2" + exec /$cmd "$PWD/$dofile" "$@" else set -$_do_opt_verbose$_do_opt_exec - :; . "$PWD/$dofile" >"$tmp.tmp2" + :; . "$PWD/$dofile" fi } -# Find and run the right .do file, starting in dir $1, for target $2, using -# filename $3 as the temporary output file. Renames the temp file to $2 when +# Find and run the right .do file, starting in dir $1, for target $2, +# providing a temporary output file as $3. Renames the temp file to $2 when # done. _do() { - local dir="$1" target="$2" tmp="$3" dopath= dodir= dofile= ext= + local dir="$1" target="$1$2" tmp="$1$2.redo.tmp" + local dopath= dodir= dofile= ext= if [ "$_cmd" = "redo" ] || ( [ ! -e "$target" -o -d "$target" ] && [ ! -e "$target.did" ] ); then printf '%sdo %s%s%s%s\n' \ - "$green" "$DO_DEPTH" "$bold" "$dir$target" "$plain" >&2 + "$green" "$DO_DEPTH" "$bold" "$target" "$plain" >&2 dopath=$(_find_dofile "$target") if [ ! -e "$dopath" ]; then echo "do: $target: no .do file ($PWD)" >&2 @@ -292,56 +306,60 @@ _do() target=$PWD/$target tmp=$PWD/$tmp cd "$dodir" || return 99 - target=$(_relpath "$target") || return 98 - tmp=$(_relpath "$tmp") || return 97 + target=$(_relpath "$target" "$PWD") || return 98 + tmp=$(_relpath "$tmp" "$PWD") || return 97 base=${target%$ext} [ ! -e "$DO_BUILT" ] || [ ! -d "$(dirname "$target")" ] || : >>"$target.did.tmp" - ( _run_dofile "$target" "$base" "$tmp.tmp" ) - rv=$? - if [ $rv != 0 ]; then - printf "do: %s%s\n" "$DO_DEPTH" \ - "$dir$target: got exit code $rv" >&2 - rm -f "$tmp.tmp" "$tmp.tmp2" "$target.did" - return $rv - fi - echo "$PWD/$target" >>"$DO_BUILT" - mv "$tmp.tmp" "$target" 2>/dev/null || - ! test -s "$tmp.tmp2" || - mv "$tmp.tmp2" "$target" 2>/dev/null + # $qtmp is a temporary file used to capture stdout. + # Since it might be accidentally deleted as a .do file + # does its work, we create it, then open two fds to it, + # then immediately delete the name. We use one fd to + # redirect to stdout, and the other to read from after, + # because there's no way to fseek(fd, 0) in sh. + qtmp=$DO_PATH/do.$$.tmp + ( + rm -f "$qtmp" + ( _run_dofile "$target" "$base" "$tmp" >&3 3>&- 4<&- ) + rv=$? + if [ $rv != 0 ]; then + printf "do: %s%s\n" "$DO_DEPTH" \ + "$target: got exit code $rv" >&2 + rm -f "$tmp.tmp" "$tmp.tmp2" "$target.did" + return $rv + fi + echo "$PWD/$target" >>"$DO_BUILT" + if [ ! -e "$tmp" ]; then + # if $3 wasn't created, copy from stdout file + cat <&4 >$tmp + # if that's zero length too, forget it + [ -s "$tmp" ] || rm -f "$tmp" + fi + ) 3>$qtmp 4<$qtmp || return + mv "$tmp" "$target" 2>/dev/null [ -e "$target.did.tmp" ] && mv "$target.did.tmp" "$target.did" || : >>"$target.did" - rm -f "$tmp.tmp2" else _debug "do $DO_DEPTH$target exists." >&2 fi } -# Make corrections for directories that don't actually exist yet. -_dir_shovel() -{ - local dir base - xdir=$1 xbase=$2 xbasetmp=$2 - while [ ! -d "$xdir" -a -n "$xdir" ]; do - _dirsplit "${xdir%/}" - xbasetmp=${_dirsplit_base}__$xbasetmp - xdir=$_dirsplit_dir xbase=$_dirsplit_base/$xbase - done - _debug "xbasetmp='$xbasetmp'" >&2 -} - - # Implementation of the "redo" command. _redo() { + local i startdir="$PWD" dir base set +e for i in "$@"; do - _dirsplit "$i" - _dir_shovel "$_dirsplit_dir" "$_dirsplit_base" - dir=$xdir base=$xbase basetmp=$xbasetmp - ( cd "$dir" && _do "$dir" "$base" "$basetmp" ) + i=$(_abspath "$i" "$startdir") + ( + cd "$DO_STARTDIR" || return 99 + i=$(_normpath "$(_relpath "$i" "$PWD")" "$PWD") + _dirsplit "$i" + dir=$_dirsplit_dir base=$_dirsplit_base + _do "$dir" "$base" + ) [ "$?" = 0 ] || return 1 done } diff --git a/minimal/do.test b/minimal/do.test index 292e8ef..04fed5f 100755 --- a/minimal/do.test +++ b/minimal/do.test @@ -102,31 +102,32 @@ check_s "" "$_dirsplit_base" SECTION _relpath -check "a/b/c" _relpath "$PWD/a/b/c" -check "../a/b/c" _relpath "$PWD/../a/b/c" -check "" _relpath "$PWD" -(cd / && check "a/b/c" _relpath a/b/c) -(cd / && check "a/b/c" _relpath /a/b/c) -(cd / && check "" _relpath /) -(cd /usr/bin && check "../lib" _relpath /usr/lib) -(cd /usr/bin && check "../lib/" _relpath /usr/lib/) -(cd /usr/bin && check "../.." _relpath /) -(cd fakedir && check ".." _relpath ..) -(cd fakedir && check "../" _relpath ../) -(cd fakedir && check "../fakedir" _relpath ../fakedir) +x=$PWD/x +check "a/b/c" _relpath "$x/a/b/c" "$x" +check "../a/b/c" _relpath "$x/../a/b/c" "$x" +check "" _relpath "$x" "$x" +check "a/b/c" _relpath a/b/c "/" +check "a/b/c" _relpath /a/b/c "/" +check "" _relpath / "/" +check "../lib" _relpath /usr/lib "/usr/bin" +check "../lib/" _relpath /usr/lib/ "/usr/bin" +check "../.." _relpath / "/usr/bin" +check ".." _relpath .. "$PWD/fakedir" +check "../" _relpath ../ "$PWD/fakedir" +check "../fakedir" _relpath ../fakedir "$PWD/fakedir" SECTION _normpath -check "/usr/lib" _normpath /usr/../usr/bin/../lib -check "/" _normpath /a/b/c/../../.. -check "/" _normpath / -check "../a" _normpath ../a -(cd fakedir && check "../a/b" _normpath ../a/b) -(cd fakedir && check ".." _normpath ..) -(cd / && check "tuv" _normpath a/b/../../tuv) -(cd / && check "" _normpath a/b/../..) -check ".." _normpath ../ -check ".." _normpath .. +check "/usr/lib" _normpath /usr/../usr/bin/../lib "$x" +check "/" _normpath /a/b/c/../../.. "$x" +check "/" _normpath / "$x" +check "../a" _normpath ../a "$x" +check "../a/b" _normpath ../a/b "$x/fakedir" +check ".." _normpath .. "$x/fakedir" +check "tuv" _normpath a/b/../../tuv "/" +check "" _normpath a/b/../.. "/" +check ".." _normpath ../ "$x" +check ".." _normpath .. "$x" SECTION _find_dofile diff --git a/redo/builder.py b/redo/builder.py index eca8335..8cd66c5 100644 --- a/redo/builder.py +++ b/redo/builder.py @@ -1,5 +1,5 @@ """Code for parallel-building a set of targets, if needed.""" -import sys, os, errno, stat, signal, time +import errno, os, stat, signal, sys, tempfile, time from . import cycles, env, jobserver, logs, state, paths from .helpers import unlink, close_on_exec from .logs import debug2, err, warn, meta @@ -120,20 +120,14 @@ class _BuildJob(object): def __init__(self, t, sf, lock, shouldbuildfunc, donefunc): self.t = t # original target name, not relative to env.v.BASE self.sf = sf - tmpbase = t - while not os.path.isdir(os.path.dirname(tmpbase) or '.'): - ofs = tmpbase.rfind('/') - assert ofs >= 0 - tmpbase = tmpbase[:ofs] + '__' + tmpbase[ofs+1:] - self.tmpname1 = '%s.redo1.tmp' % tmpbase - self.tmpname2 = '%s.redo2.tmp' % tmpbase + self.tmpname = None self.lock = lock self.shouldbuildfunc = shouldbuildfunc self.donefunc = donefunc self.before_t = _try_stat(self.t) # attributes of the running process - self.f = None + self.outfile = None def start(self): """Actually start running this job in a subproc, if needed.""" @@ -195,11 +189,32 @@ class _BuildJob(object): else: err('no rule to redo %r\n' % t) return self._finalize(1) - unlink(self.tmpname1) - unlink(self.tmpname2) - ffd = os.open(self.tmpname1, os.O_CREAT|os.O_RDWR|os.O_EXCL, 0666) + # There is no good place for us to pre-create a temp file for + # stdout. The target dir might not exist yet, or it might currently + # exist but get wiped by the .do script. Other dirs, like the one + # containing the .do file, might be mounted readonly. We can put it + # in the system temp dir, but then we can't necessarily rename it to + # the target filename because it might cross filesystem boundaries. + # Also, if redo is interrupted, it would leave a temp file lying + # around. To avoid all this, use mkstemp() to create a temp file + # wherever it wants to, and immediately unlink it, but keep a file + # handle open. When the .do script finishes, we can copy the + # content out of that nameless file handle into a file in the same + # dir as the target (which by definition must now exist, if you + # wanted the target to exist). + # + # On the other hand, the $3 temp filename can be hardcoded to be in + # the target directory, even if that directory does not exist. + # It's not *redo*'s job to create that file. The .do file will + # create it, if it wants, and it's the .do file's job to first ensure + # that the directory exists. + tmpbase = os.path.join(dodir, basename + ext) + self.tmpname = tmpbase + '.redo.tmp' + unlink(self.tmpname) + ffd, fname = tempfile.mkstemp(prefix='redo.', suffix='.tmp') close_on_exec(ffd, True) - self.f = os.fdopen(ffd, 'w+') + os.unlink(fname) + self.outfile = os.fdopen(ffd, 'w+') # this will run in the dofile's directory, so use only basenames here arg1 = basename + ext # target name (including extension) arg2 = basename # target name (without extension) @@ -207,8 +222,8 @@ class _BuildJob(object): dofile, arg1, arg2, - # temp output file name - state.relpath(os.path.abspath(self.tmpname2), dodir), + # $3 temp output file name + state.relpath(os.path.abspath(self.tmpname), dodir), ] if env.v.VERBOSE: argv[1] += 'v' @@ -287,8 +302,8 @@ class _BuildJob(object): cycles.add(self.lock.fid) if dodir: os.chdir(dodir) - os.dup2(self.f.fileno(), 1) - os.close(self.f.fileno()) + os.dup2(self.outfile.fileno(), 1) + os.close(self.outfile.fileno()) close_on_exec(1, False) if env.v.LOG: cur_inode = str(os.fstat(2).st_ino) @@ -340,21 +355,12 @@ class _BuildJob(object): (like writing directly to $1 instead of $3). We also have to record the new file stamp data for the completed target. """ - f = self.f + outfile = self.outfile before_t = self.before_t after_t = _try_stat(t) - st1 = os.fstat(f.fileno()) - st1b = _try_stat(self.tmpname1) - st2 = _try_stat(self.tmpname2) - if not st1.st_size and not st1b: - warn("%s: %s deleted %s; don't do that.\n" - % (_nice(t), argv[2], self.tmpname1)) - # not fatal, continue - if st1.st_size > 0 and not st1b: - err('%s: %s wrote to stdout but deleted %s.\n' - % (_nice(t), argv[2], self.tmpname1)) - rv = 207 - elif (after_t and + st1 = os.fstat(outfile.fileno()) + st2 = _try_stat(self.tmpname) + if (after_t and (not before_t or before_t.st_mtime != after_t.st_mtime) and not stat.S_ISDIR(after_t.st_mode)): err('%s modified %s directly!\n' % (argv[2], t)) @@ -368,9 +374,11 @@ class _BuildJob(object): # FIXME: race condition here between updating stamp/is_generated # and actually renaming the files into place. There needs to # be some kind of two-stage commit, I guess. - if st2: + if st1.st_size > 0: + # script wrote to stdout. Copy its contents to the tmpfile. + unlink(self.tmpname) try: - os.rename(self.tmpname2, t) + newf = open(self.tmpname, 'w') except OSError, e: dnt = os.path.dirname(t) if not os.path.exists(dnt): @@ -380,23 +388,32 @@ class _BuildJob(object): else: # I don't know why this would happen, so raise the # full exception if it ever does. - err('%s: rename %s: %s\n' % (t, self.tmpname2, e)) + err('%s: save stdout to %s: %s\n' + % (t, self.tmpname, e)) raise - unlink(self.tmpname1) - elif st1.st_size > 0: + else: + self.outfile.seek(0) + while 1: + b = self.outfile.read(1024*1024) + if not b: + break + newf.write(b) + newf.close() + st2 = _try_stat(self.tmpname) + if st2: + # either $3 file was created *or* stdout was written to. + # therefore tmpfile now exists. try: - os.rename(self.tmpname1, t) + # Atomically replace the target file + os.rename(self.tmpname, t) except OSError, e: - if e.errno == errno.ENOENT: - unlink(t) - else: - err('%s: can\'t save stdout to %r: %s\n' % - (argv[2], t, e.strerror)) - rv = 1000 - if st2: - os.unlink(self.tmpname2) + # Nowadays self.tmpname is in the same directory as + # t, so there's no very good reason for this to + # fail. Thus, raise the full exception if it ever + # does. + err('%s: rename %s: %s\n' % (t, self.tmpname, e)) + raise else: # no output generated at all; that's ok - unlink(self.tmpname1) unlink(t) sf = self.sf sf.refresh() @@ -412,13 +429,12 @@ class _BuildJob(object): sf.update_stamp() sf.set_changed() else: - unlink(self.tmpname1) - unlink(self.tmpname2) + unlink(self.tmpname) sf = self.sf sf.set_failed() sf.zap_deps2() sf.save() - f.close() + outfile.close() meta('done', '%d %s' % (rv, state.target_relpath(self.t))) return rv diff --git a/t/202-del/.gitignore b/t/202-del/.gitignore index 0b5079e..9b97e18 100644 --- a/t/202-del/.gitignore +++ b/t/202-del/.gitignore @@ -1,2 +1,3 @@ deltest2 /destruct +/x diff --git a/t/202-del/all.do b/t/202-del/all.do index dcbcd15..6b553bf 100644 --- a/t/202-del/all.do +++ b/t/202-del/all.do @@ -1 +1 @@ -redo deltest deltest2 deltest3 +redo deltest deltest2 deltest3 deltest4 diff --git a/t/202-del/clean.do b/t/202-del/clean.do index e2cf539..b17983a 100644 --- a/t/202-del/clean.do +++ b/t/202-del/clean.do @@ -1,2 +1,2 @@ -rm -rf destruct +rm -rf destruct x rm -f deltest2 *~ .*~ diff --git a/t/202-del/default.spam1.do b/t/202-del/default.spam1.do new file mode 100644 index 0000000..55e08c6 --- /dev/null +++ b/t/202-del/default.spam1.do @@ -0,0 +1,3 @@ +rm -rf x +mkdir x +echo 'stdout' diff --git a/t/202-del/default.spam2.do b/t/202-del/default.spam2.do new file mode 100644 index 0000000..715426e --- /dev/null +++ b/t/202-del/default.spam2.do @@ -0,0 +1,4 @@ +rm -rf x +mkdir x +echo 'redir' >$3 + diff --git a/t/202-del/deltest3.do b/t/202-del/deltest3.do index d1ba8a7..c1ad79a 100644 --- a/t/202-del/deltest3.do +++ b/t/202-del/deltest3.do @@ -10,7 +10,7 @@ cat >destruct2.do <<-EOF echo 'stdout' EOF -# deleting unused stdout file is a warning only +# deleting unused stdout file is a warning at most redo destruct1 2>destruct1.log || exit 11 [ "$(cat destruct1)" = "redir" ] || exit 12 diff --git a/t/202-del/deltest4.do b/t/202-del/deltest4.do new file mode 100644 index 0000000..d3fc3d1 --- /dev/null +++ b/t/202-del/deltest4.do @@ -0,0 +1,15 @@ +rm -rf x +redo x/a.spam2 +[ "$(cat x/a.spam2)" = "redir" ] || exit 11 +redo x/a.spam2 +[ "$(cat x/a.spam2)" = "redir" ] || exit 12 +redo x/b.spam2 +[ "$(cat x/b.spam2)" = "redir" ] || exit 13 + +rm -rf x +redo x/a.spam1 +[ "$(cat x/a.spam1)" = "stdout" ] || exit 21 +redo x/a.spam1 +[ "$(cat x/a.spam1)" = "stdout" ] || exit 22 +redo x/b.spam1 +[ "$(cat x/b.spam1)" = "stdout" ] || exit 23