Use mkstemp() to create the stdout temp file, and simplify $3 path.

Previously, we'd try to put the stdout temp file in the same dir as the target, if that dir exists. Otherwise we'd walk up the directory tree looking for a good place. But this would go wrong if the directory we chose got *deleted* during the run of the .do file. Instead, we switch to an entirely new design: we use mkstemp() to generate a temp file in the standard temp file location (probably /tmp), then open it and immediately delete it, so the .do file can't cause any unexpected behaviour. After the .do file exits, we use our still-open fd to the stdout file to read the content back out. In the old implementation, we also put the $3 in the "adjusted" location that depended whether the target dir already existed, just for consistency. But that was never necessary: we didn't create the $3 file, and if the .do script wants to write to $3, it should create the target dir first anyway. So change it to *always* use a $3 temp filename in the target dir, which is much simpler and so has fewer edge cases. Add t/202-del/deltest4 with some tests for all these edge cases. Reported-by: Jeff Stearns <jeff.stearns@gmail.com>
2018-12-12 03:37:44 +00:00 · 2018-12-12 03:37:44 +00:00 · d95277d121
commit d95277d121
parent 1f79bf1174
10 changed files with 177 additions and 119 deletions
--- a/minimal/do
+++ b/minimal/do
@ -4,7 +4,7 @@
 # For the full version, visit http://github.com/apenwarr/redo
 #
 # The author disclaims copyright to this source file and hereby places it in
-# the public domain. (2010 12 14; updated 2018 12 11)
+# the public domain. (2010 12 14; updated 2018 12 13)
 #
 USAGE="
 usage: do [-d] [-x] [-v] [-c] <targets...>
@ -77,6 +77,7 @@ if [ -z "$DO_BUILT" -a "$_cmd" != "redo-whichdo" ]; then
 	if [ "$#" -eq 0 ] && [ "$_cmd" = "do" -o "$_cmd" = "redo" ]; then
 		set all  # only toplevel redo has a default target
 	fi
+	export DO_STARTDIR="$PWD"
 	export DO_BUILT="$PWD/.do_built"
 	if [ -z "$_do_opt_clean" -a -e "$DO_BUILT" ]; then
 		echo "do: Incremental mode. Use -c for clean rebuild." >&2
@ -89,7 +90,7 @@ if [ -z "$DO_BUILT" -a "$_cmd" != "redo-whichdo" ]; then
 	done <"$DO_BUILT.new" |
 	xargs -0 rm -f 2>/dev/null
 	mv "$DO_BUILT.new" "$DO_BUILT"
-	DO_PATH=$DO_BUILT.dir
+	export DO_PATH="$DO_BUILT.dir"
 	export PATH="$DO_PATH:$PATH"
 	rm -rf "$DO_PATH"
 	mkdir "$DO_PATH"
@ -128,11 +129,23 @@ _endswith()
 }


+# Prints $1 if it's absolute, or $2/$1 if $1 is not absolute.
+_abspath()
+{
+	local here="$2" there="$1"
+	if _startswith "$1" "/"; then
+		echo "$1"
+	else
+		echo "$2/$1"
+	fi
+}
+
+
 # Prints $1 as a path relative to $PWD (not starting with /).
 # If it already doesn't start with a /, doesn't change the string.
 _relpath()
 {
-	local here="$(command pwd)" there="$1" out= hadslash=
+	local here="$2" there="$1" out= hadslash=
 	#echo "RP start '$there' hs='$hadslash'" >&2
 	_startswith "$there" "/" || { echo "$there" && return; }
 	[ "$there" != "/" ] && _endswith "$there" "/" && hadslash=/
@ -158,12 +171,12 @@ _relpath()
 # For example, a/b/../c will be reduced to just a/c.
 _normpath()
 (
-	local path="$1" out= isabs=
+	local path="$1" relto="$2" out= isabs=
 	#echo "NP start '$path'" >&2
 	if _startswith "$path" "/"; then
 		isabs=1
 	else
-		path="${PWD%/}/$path"
+		path="${relto%/}/$path"
 	fi
 	set -f
 	IFS=/
@ -180,7 +193,7 @@ _normpath()
 	if [ -n "$isabs" ]; then
 		echo "${out:-/}"
 	else
-		_relpath "${out:-/}"
+		_relpath "${out:-/}" "$relto"
 	fi
 )

@ -222,7 +235,7 @@ _find_dofiles()
 		[ -n "$dodir" ] && dodir=${dodir%/}/
 		#echo "_find_dofiles: '$dodir' '$dofile'" >&2
 		_find_dofiles_pwd "$dodir" "$dofile" && return 0
-		newdir=$(_normpath "${dodir}..")
+		newdir=$(_normpath "${dodir}.." "$PWD")
 		[ "$newdir" = "$dodir" ] && break
 		dodir=$newdir
 	done
@ -257,25 +270,26 @@ _run_dofile()
 	cmd=${line1#"#!/"}
 	if [ "$cmd" != "$line1" ]; then
 		set -$_do_opt_verbose$_do_opt_exec
-		exec /$cmd "$PWD/$dofile" "$@" >"$tmp.tmp2"
+		exec /$cmd "$PWD/$dofile" "$@"
 	else
 		set -$_do_opt_verbose$_do_opt_exec
-		:; . "$PWD/$dofile" >"$tmp.tmp2"
+		:; . "$PWD/$dofile"
 	fi
 }


-# Find and run the right .do file, starting in dir $1, for target $2, using
-# filename $3 as the temporary output file.  Renames the temp file to $2 when
+# Find and run the right .do file, starting in dir $1, for target $2,
+# providing a temporary output file as $3.  Renames the temp file to $2 when
 # done.
 _do()
 {
-	local dir="$1" target="$2" tmp="$3" dopath= dodir= dofile= ext=
+	local dir="$1" target="$1$2" tmp="$1$2.redo.tmp"
+	local dopath= dodir= dofile= ext=
 	if [ "$_cmd" = "redo" ] ||
 	    ( [ ! -e "$target" -o -d "$target" ] &&
 	      [ ! -e "$target.did" ] ); then
 		printf '%sdo  %s%s%s%s\n' \
-			"$green" "$DO_DEPTH" "$bold" "$dir$target" "$plain" >&2
+			"$green" "$DO_DEPTH" "$bold" "$target" "$plain" >&2
 		dopath=$(_find_dofile "$target")
 		if [ ! -e "$dopath" ]; then
 			echo "do: $target: no .do file ($PWD)" >&2
@ -292,56 +306,60 @@ _do()
 		target=$PWD/$target
 		tmp=$PWD/$tmp
 		cd "$dodir" || return 99
-		target=$(_relpath "$target") || return 98
-		tmp=$(_relpath "$tmp") || return 97
+		target=$(_relpath "$target" "$PWD") || return 98
+		tmp=$(_relpath "$tmp" "$PWD") || return 97
 		base=${target%$ext}
 		[ ! -e "$DO_BUILT" ] || [ ! -d "$(dirname "$target")" ] ||
 		: >>"$target.did.tmp"
-		( _run_dofile "$target" "$base" "$tmp.tmp" )
+		# $qtmp is a temporary file used to capture stdout.
+		# Since it might be accidentally deleted as a .do file
+		# does its work, we create it, then open two fds to it,
+		# then immediately delete the name.  We use one fd to
+		# redirect to stdout, and the other to read from after,
+		# because there's no way to fseek(fd, 0) in sh.
+		qtmp=$DO_PATH/do.$$.tmp
+		(
+			rm -f "$qtmp"
+			( _run_dofile "$target" "$base" "$tmp" >&3 3>&- 4<&- )
 			rv=$?
 			if [ $rv != 0 ]; then
 				printf "do: %s%s\n" "$DO_DEPTH" \
-				"$dir$target: got exit code $rv" >&2
+					"$target: got exit code $rv" >&2
 				rm -f "$tmp.tmp" "$tmp.tmp2" "$target.did"
 				return $rv
 			fi
 			echo "$PWD/$target" >>"$DO_BUILT"
-		mv "$tmp.tmp" "$target" 2>/dev/null ||
-		! test -s "$tmp.tmp2" ||
-		mv "$tmp.tmp2" "$target" 2>/dev/null
+			if [ ! -e "$tmp" ]; then
+				# if $3 wasn't created, copy from stdout file
+				cat <&4 >$tmp
+				# if that's zero length too, forget it
+				[ -s "$tmp" ] || rm -f "$tmp"
+			fi
+		) 3>$qtmp 4<$qtmp || return
+		mv "$tmp" "$target" 2>/dev/null
 		[ -e "$target.did.tmp" ] &&
 		mv "$target.did.tmp" "$target.did" ||
 		: >>"$target.did"
-		rm -f "$tmp.tmp2"
 	else
 		_debug "do  $DO_DEPTH$target exists." >&2
 	fi
 }


-# Make corrections for directories that don't actually exist yet.
-_dir_shovel()
-{
-	local dir base
-	xdir=$1 xbase=$2 xbasetmp=$2
-	while [ ! -d "$xdir" -a -n "$xdir" ]; do
-		_dirsplit "${xdir%/}"
-		xbasetmp=${_dirsplit_base}__$xbasetmp
-		xdir=$_dirsplit_dir xbase=$_dirsplit_base/$xbase
-	done
-	_debug "xbasetmp='$xbasetmp'" >&2
-}
-
-
 # Implementation of the "redo" command.
 _redo()
 {
+	local i startdir="$PWD" dir base
 	set +e
 	for i in "$@"; do
+		i=$(_abspath "$i" "$startdir")
+		(
+			cd "$DO_STARTDIR" || return 99
+			i=$(_normpath "$(_relpath "$i" "$PWD")" "$PWD")
 			_dirsplit "$i"
-		_dir_shovel "$_dirsplit_dir" "$_dirsplit_base"
-		dir=$xdir base=$xbase basetmp=$xbasetmp
-		( cd "$dir" && _do "$dir" "$base" "$basetmp" )
+			dir=$_dirsplit_dir base=$_dirsplit_base
+			_do "$dir" "$base"
+		)
 		[ "$?" = 0 ] || return 1
 	done
 }
--- a/minimal/do.test
+++ b/minimal/do.test
@ -102,31 +102,32 @@ check_s "" "$_dirsplit_base"


 SECTION _relpath
-check "a/b/c"  _relpath "$PWD/a/b/c"
-check "../a/b/c"  _relpath "$PWD/../a/b/c"
-check ""  _relpath "$PWD"
-(cd / && check "a/b/c"  _relpath a/b/c)
-(cd / && check "a/b/c"  _relpath /a/b/c)
-(cd / && check ""  _relpath /)
-(cd /usr/bin && check "../lib"  _relpath /usr/lib)
-(cd /usr/bin && check "../lib/"  _relpath /usr/lib/)
-(cd /usr/bin && check "../.."  _relpath /)
-(cd fakedir && check ".."  _relpath ..)
-(cd fakedir && check "../"  _relpath ../)
-(cd fakedir && check "../fakedir"  _relpath ../fakedir)
+x=$PWD/x
+check "a/b/c"  _relpath "$x/a/b/c" "$x"
+check "../a/b/c"  _relpath "$x/../a/b/c" "$x"
+check ""  _relpath "$x" "$x"
+check "a/b/c"  _relpath a/b/c "/"
+check "a/b/c"  _relpath /a/b/c "/"
+check ""  _relpath / "/"
+check "../lib"  _relpath /usr/lib "/usr/bin"
+check "../lib/"  _relpath /usr/lib/ "/usr/bin"
+check "../.."  _relpath / "/usr/bin"
+check ".."  _relpath .. "$PWD/fakedir"
+check "../"  _relpath ../ "$PWD/fakedir"
+check "../fakedir"  _relpath ../fakedir "$PWD/fakedir"


 SECTION _normpath
-check "/usr/lib"  _normpath /usr/../usr/bin/../lib
-check "/"  _normpath /a/b/c/../../..
-check "/"  _normpath /
-check "../a" _normpath ../a
-(cd fakedir && check "../a/b"  _normpath ../a/b)
-(cd fakedir && check ".." _normpath ..)
-(cd / && check "tuv"  _normpath a/b/../../tuv)
-(cd / && check ""  _normpath a/b/../..)
-check ".." _normpath ../
-check ".." _normpath ..
+check "/usr/lib"  _normpath /usr/../usr/bin/../lib "$x"
+check "/"  _normpath /a/b/c/../../.. "$x"
+check "/"  _normpath / "$x"
+check "../a" _normpath ../a "$x"
+check "../a/b"  _normpath ../a/b "$x/fakedir"
+check ".." _normpath .. "$x/fakedir"
+check "tuv"  _normpath a/b/../../tuv "/"
+check ""  _normpath a/b/../.. "/"
+check ".." _normpath ../ "$x"
+check ".." _normpath .. "$x"


 SECTION _find_dofile
--- a/redo/builder.py
+++ b/redo/builder.py
@ -1,5 +1,5 @@
 """Code for parallel-building a set of targets, if needed."""
-import sys, os, errno, stat, signal, time
+import errno, os, stat, signal, sys, tempfile, time
 from . import cycles, env, jobserver, logs, state, paths
 from .helpers import unlink, close_on_exec
 from .logs import debug2, err, warn, meta
@ -120,20 +120,14 @@ class _BuildJob(object):
    def __init__(self, t, sf, lock, shouldbuildfunc, donefunc):
        self.t = t  # original target name, not relative to env.v.BASE
        self.sf = sf
-        tmpbase = t
-        while not os.path.isdir(os.path.dirname(tmpbase) or '.'):
-            ofs = tmpbase.rfind('/')
-            assert ofs >= 0
-            tmpbase = tmpbase[:ofs] + '__' + tmpbase[ofs+1:]
-        self.tmpname1 = '%s.redo1.tmp' % tmpbase
-        self.tmpname2 = '%s.redo2.tmp' % tmpbase
+        self.tmpname = None
        self.lock = lock
        self.shouldbuildfunc = shouldbuildfunc
        self.donefunc = donefunc
        self.before_t = _try_stat(self.t)

        # attributes of the running process
-        self.f = None
+        self.outfile = None

    def start(self):
        """Actually start running this job in a subproc, if needed."""
@ -195,11 +189,32 @@ class _BuildJob(object):
            else:
                err('no rule to redo %r\n' % t)
                return self._finalize(1)
-        unlink(self.tmpname1)
-        unlink(self.tmpname2)
-        ffd = os.open(self.tmpname1, os.O_CREAT|os.O_RDWR|os.O_EXCL, 0666)
+        # There is no good place for us to pre-create a temp file for
+        # stdout.  The target dir might not exist yet, or it might currently
+        # exist but get wiped by the .do script.  Other dirs, like the one
+        # containing the .do file, might be mounted readonly.  We can put it
+        # in the system temp dir, but then we can't necessarily rename it to
+        # the target filename because it might cross filesystem boundaries.
+        # Also, if redo is interrupted, it would leave a temp file lying
+        # around.  To avoid all this, use mkstemp() to create a temp file
+        # wherever it wants to, and immediately unlink it, but keep a file
+        # handle open.  When the .do script finishes, we can copy the
+        # content out of that nameless file handle into a file in the same
+        # dir as the target (which by definition must now exist, if you
+        # wanted the target to exist).
+        #
+        # On the other hand, the $3 temp filename can be hardcoded to be in
+        # the target directory, even if that directory does not exist.
+        # It's not *redo*'s job to create that file.  The .do file will
+        # create it, if it wants, and it's the .do file's job to first ensure
+        # that the directory exists.
+        tmpbase = os.path.join(dodir, basename + ext)
+        self.tmpname = tmpbase + '.redo.tmp'
+        unlink(self.tmpname)
+        ffd, fname = tempfile.mkstemp(prefix='redo.', suffix='.tmp')
        close_on_exec(ffd, True)
-        self.f = os.fdopen(ffd, 'w+')
+        os.unlink(fname)
+        self.outfile = os.fdopen(ffd, 'w+')
        # this will run in the dofile's directory, so use only basenames here
        arg1 = basename + ext  # target name (including extension)
        arg2 = basename        # target name (without extension)
@ -207,8 +222,8 @@ class _BuildJob(object):
                dofile,
                arg1,
                arg2,
-                # temp output file name
-                state.relpath(os.path.abspath(self.tmpname2), dodir),
+                # $3 temp output file name
+                state.relpath(os.path.abspath(self.tmpname), dodir),
               ]
        if env.v.VERBOSE:
            argv[1] += 'v'
@ -287,8 +302,8 @@ class _BuildJob(object):
        cycles.add(self.lock.fid)
        if dodir:
            os.chdir(dodir)
-        os.dup2(self.f.fileno(), 1)
-        os.close(self.f.fileno())
+        os.dup2(self.outfile.fileno(), 1)
+        os.close(self.outfile.fileno())
        close_on_exec(1, False)
        if env.v.LOG:
            cur_inode = str(os.fstat(2).st_ino)
@ -340,21 +355,12 @@ class _BuildJob(object):
        (like writing directly to $1 instead of $3).  We also have to record
        the new file stamp data for the completed target.
        """
-        f = self.f
+        outfile = self.outfile
        before_t = self.before_t
        after_t = _try_stat(t)
-        st1 = os.fstat(f.fileno())
-        st1b = _try_stat(self.tmpname1)
-        st2 = _try_stat(self.tmpname2)
-        if not st1.st_size and not st1b:
-            warn("%s: %s deleted %s; don't do that.\n"
-                 % (_nice(t), argv[2], self.tmpname1))
-            # not fatal, continue
-        if st1.st_size > 0 and not st1b:
-            err('%s: %s wrote to stdout but deleted %s.\n'
-                % (_nice(t), argv[2], self.tmpname1))
-            rv = 207
-        elif (after_t and
+        st1 = os.fstat(outfile.fileno())
+        st2 = _try_stat(self.tmpname)
+        if (after_t and
                (not before_t or before_t.st_mtime != after_t.st_mtime) and
                not stat.S_ISDIR(after_t.st_mode)):
            err('%s modified %s directly!\n' % (argv[2], t))
@ -368,9 +374,11 @@ class _BuildJob(object):
            # FIXME: race condition here between updating stamp/is_generated
            # and actually renaming the files into place.  There needs to
            # be some kind of two-stage commit, I guess.
-            if st2:
+            if st1.st_size > 0:
+                # script wrote to stdout.  Copy its contents to the tmpfile.
+                unlink(self.tmpname)
                try:
-                    os.rename(self.tmpname2, t)
+                    newf = open(self.tmpname, 'w')
                except OSError, e:
                    dnt = os.path.dirname(t)
                    if not os.path.exists(dnt):
@ -380,23 +388,32 @@ class _BuildJob(object):
                    else:
                        # I don't know why this would happen, so raise the
                        # full exception if it ever does.
-                        err('%s: rename %s: %s\n' % (t, self.tmpname2, e))
+                        err('%s: save stdout to %s: %s\n'
+                            % (t, self.tmpname, e))
                        raise
-                unlink(self.tmpname1)
-            elif st1.st_size > 0:
-                try:
-                    os.rename(self.tmpname1, t)
-                except OSError, e:
-                    if e.errno == errno.ENOENT:
-                        unlink(t)
                else:
-                        err('%s: can\'t save stdout to %r: %s\n' %
-                            (argv[2], t, e.strerror))
-                        rv = 1000
+                    self.outfile.seek(0)
+                    while 1:
+                        b = self.outfile.read(1024*1024)
+                        if not b:
+                            break
+                        newf.write(b)
+                    newf.close()
+                    st2 = _try_stat(self.tmpname)
            if st2:
-                    os.unlink(self.tmpname2)
+                # either $3 file was created *or* stdout was written to.
+                # therefore tmpfile now exists.
+                try:
+                    # Atomically replace the target file
+                    os.rename(self.tmpname, t)
+                except OSError, e:
+                    # Nowadays self.tmpname is in the same directory as
+                    # t, so there's no very good reason for this to
+                    # fail. Thus, raise the full exception if it ever
+                    # does.
+                    err('%s: rename %s: %s\n' % (t, self.tmpname, e))
+                    raise
            else: # no output generated at all; that's ok
-                unlink(self.tmpname1)
                unlink(t)
            sf = self.sf
            sf.refresh()
@ -412,13 +429,12 @@ class _BuildJob(object):
                sf.update_stamp()
                sf.set_changed()
        else:
-            unlink(self.tmpname1)
-            unlink(self.tmpname2)
+            unlink(self.tmpname)
            sf = self.sf
            sf.set_failed()
        sf.zap_deps2()
        sf.save()
-        f.close()
+        outfile.close()
        meta('done', '%d %s' % (rv, state.target_relpath(self.t)))
        return rv

--- a/t/202-del/.gitignore
+++ b/t/202-del/.gitignore
@ -1,2 +1,3 @@
 deltest2
 /destruct
+/x
--- a/t/202-del/all.do
+++ b/t/202-del/all.do
@ -1 +1 @@
-redo deltest deltest2 deltest3
+redo deltest deltest2 deltest3 deltest4
--- a/t/202-del/clean.do
+++ b/t/202-del/clean.do
@ -1,2 +1,2 @@
-rm -rf destruct
+rm -rf destruct x
 rm -f deltest2 *~ .*~
--- a/t/202-del/default.spam1.do
+++ b/t/202-del/default.spam1.do
@ -0,0 +1,3 @@
+rm -rf x
+mkdir x
+echo 'stdout'
--- a/t/202-del/default.spam2.do
+++ b/t/202-del/default.spam2.do
@ -0,0 +1,4 @@
+rm -rf x
+mkdir x
+echo 'redir' >$3
+
--- a/t/202-del/deltest3.do
+++ b/t/202-del/deltest3.do
@ -10,7 +10,7 @@ cat >destruct2.do <<-EOF
 	echo 'stdout'
 EOF

-# deleting unused stdout file is a warning only
+# deleting unused stdout file is a warning at most
 redo destruct1 2>destruct1.log || exit 11
 [ "$(cat destruct1)" = "redir" ] || exit 12

--- a/t/202-del/deltest4.do
+++ b/t/202-del/deltest4.do
@ -0,0 +1,15 @@
+rm -rf x
+redo x/a.spam2
+[ "$(cat x/a.spam2)" = "redir" ] || exit 11
+redo x/a.spam2
+[ "$(cat x/a.spam2)" = "redir" ] || exit 12
+redo x/b.spam2
+[ "$(cat x/b.spam2)" = "redir" ] || exit 13
+
+rm -rf x
+redo x/a.spam1
+[ "$(cat x/a.spam1)" = "stdout" ] || exit 21
+redo x/a.spam1
+[ "$(cat x/a.spam1)" = "stdout" ] || exit 22
+redo x/b.spam1
+[ "$(cat x/b.spam1)" = "stdout" ] || exit 23