From 2e84c1bc02d4e01fe2ab52aef67f6d55f9bfa5f9 Mon Sep 17 00:00:00 2001 From: Avery Pennarun Date: Wed, 28 Nov 2018 14:04:12 -0500 Subject: [PATCH] Docs/cookbook: add an R + latex example. This shows how to dynamically generate a plot in R+ggplot2, then embed it into a latex document, and compile it to pdf, all with proper autodependencies. --- Documentation/cookbook/latex/.gitignore | 7 + Documentation/cookbook/latex/all.do | 8 + Documentation/cookbook/latex/clean.do | 2 + Documentation/cookbook/latex/default.dvi.do | 2 + Documentation/cookbook/latex/default.pdf.do | 3 + Documentation/cookbook/latex/default.ps.do | 3 + .../cookbook/latex/default.runtex.do | 58 ++++ Documentation/cookbook/latex/discovery.txt | 1 + Documentation/cookbook/latex/index.md | 252 ++++++++++++++++++ Documentation/cookbook/latex/mpg.R | 4 + Documentation/cookbook/latex/mpg.eps.do | 7 + Documentation/cookbook/latex/paper.deps | 1 + Documentation/cookbook/latex/paper.latex | 17 ++ mkdocs.yml | 1 + 14 files changed, 366 insertions(+) create mode 100644 Documentation/cookbook/latex/.gitignore create mode 100644 Documentation/cookbook/latex/all.do create mode 100644 Documentation/cookbook/latex/clean.do create mode 100644 Documentation/cookbook/latex/default.dvi.do create mode 100644 Documentation/cookbook/latex/default.pdf.do create mode 100644 Documentation/cookbook/latex/default.ps.do create mode 100644 Documentation/cookbook/latex/default.runtex.do create mode 100644 Documentation/cookbook/latex/discovery.txt create mode 100644 Documentation/cookbook/latex/index.md create mode 100644 Documentation/cookbook/latex/mpg.R create mode 100644 Documentation/cookbook/latex/mpg.eps.do create mode 100644 Documentation/cookbook/latex/paper.deps create mode 100644 Documentation/cookbook/latex/paper.latex diff --git a/Documentation/cookbook/latex/.gitignore b/Documentation/cookbook/latex/.gitignore new file mode 100644 index 0000000..7a53fcf --- /dev/null +++ b/Documentation/cookbook/latex/.gitignore @@ -0,0 +1,7 @@ +*.eps +*.dvi +*.ps +*.pdf +*.tmp +*~ +.*~ \ No newline at end of file diff --git a/Documentation/cookbook/latex/all.do b/Documentation/cookbook/latex/all.do new file mode 100644 index 0000000..67d705c --- /dev/null +++ b/Documentation/cookbook/latex/all.do @@ -0,0 +1,8 @@ +for d in latex dvips dvipdf Rscript; do + if ! type "$d" >/dev/null 2>/dev/null; then + echo "$0: skipping: $d not installed." >&2 + exit 0 + fi +done + +redo-ifchange paper.pdf paper.ps diff --git a/Documentation/cookbook/latex/clean.do b/Documentation/cookbook/latex/clean.do new file mode 100644 index 0000000..1f40021 --- /dev/null +++ b/Documentation/cookbook/latex/clean.do @@ -0,0 +1,2 @@ +rm -f *.eps *.dvi *.ps *.pdf *~ .*~ +rm -rf *.tmp diff --git a/Documentation/cookbook/latex/default.dvi.do b/Documentation/cookbook/latex/default.dvi.do new file mode 100644 index 0000000..15c1585 --- /dev/null +++ b/Documentation/cookbook/latex/default.dvi.do @@ -0,0 +1,2 @@ +redo-ifchange "$2.runtex" +ln "$2.tmp/$2.dvi" "$3" diff --git a/Documentation/cookbook/latex/default.pdf.do b/Documentation/cookbook/latex/default.pdf.do new file mode 100644 index 0000000..541b8ce --- /dev/null +++ b/Documentation/cookbook/latex/default.pdf.do @@ -0,0 +1,3 @@ +exec >&2 +redo-ifchange "$2.dvi" +dvipdf "$2.dvi" "$3" diff --git a/Documentation/cookbook/latex/default.ps.do b/Documentation/cookbook/latex/default.ps.do new file mode 100644 index 0000000..c518e09 --- /dev/null +++ b/Documentation/cookbook/latex/default.ps.do @@ -0,0 +1,3 @@ +exec >&2 +redo-ifchange "$2.dvi" +dvips -o "$3" "$2.dvi" diff --git a/Documentation/cookbook/latex/default.runtex.do b/Documentation/cookbook/latex/default.runtex.do new file mode 100644 index 0000000..ad6a917 --- /dev/null +++ b/Documentation/cookbook/latex/default.runtex.do @@ -0,0 +1,58 @@ +# latex produces log output on stdout, which is +# not really correct. Send it to stderr instead. +exec >&2 + +# We depend on both the .latex file and its .deps +# file (which lists additional dependencies) +redo-ifchange "$2.latex" "$2.deps" + +# Next, we have to depend on each dependency in +# the .deps file. +cat "$2.deps" | xargs redo-ifchange + +tmp="$2.tmp" +rm -rf "$tmp" +mkdir -p "$tmp" + +# latex generates eg. the table of contents by +# using a list of references ($2.aux) generated +# during its run. The first time, the table of +# contents is empty, so we have to run again. +# But then the table of contents is non-empty, +# which might cause page numbers to change, and +# so on. So we have to keep re-running until it +# finally stops changing. +touch "$tmp/$2.aux.old" +ok= +for i in $(seq 5); do + latex --halt-on-error \ + --output-directory="$tmp" \ + --recorder \ + "$2.latex" /dev/null; then + # .aux file converged, so we're done + ok=1 + break + fi + echo + echo "$0: $2.aux changed: try again (try #$i)" + echo + cp "$tmp/$2.aux" "$tmp/$2.aux.old" +done + +if [ "$ok" = "" ]; then + echo "$0: fatal: $2.aux did not converge!" + exit 10 +fi + +# If the newly produced .dvi disappears, we need +# to redo. +redo-ifchange "$tmp/$2.dvi" + +# With --recorder, latex produces a list of files +# it used during its run. Let's depend on all of +# them, so if they ever change, we'll redo. +grep ^INPUT "$tmp/$2.fls" | + cut -d' ' -f2 | + xargs redo-ifchange diff --git a/Documentation/cookbook/latex/discovery.txt b/Documentation/cookbook/latex/discovery.txt new file mode 100644 index 0000000..7ff0e2a --- /dev/null +++ b/Documentation/cookbook/latex/discovery.txt @@ -0,0 +1 @@ +It seems that \(E = m c^2\). diff --git a/Documentation/cookbook/latex/index.md b/Documentation/cookbook/latex/index.md new file mode 100644 index 0000000..042d495 --- /dev/null +++ b/Documentation/cookbook/latex/index.md @@ -0,0 +1,252 @@ +### A LaTeX typesetting example + +[LaTeX](https://www.latex-project.org/) is a typesetting system that's +especially popular in academia. Among other things, it lets you produce +postscript and pdf files from a set of (mostly text) input files. + +LaTeX documents often include images and charts. In our example, we'll show +how to auto-generate a chart for inclusion using an [R script with +ggplot2](https://ggplot2.tidyverse.org/). + +To play with this code on your own machine, get the [redo +source code](https://github.com/apenwarr/redo) and look in the +`Documentation/cookbook/latex/` directory. + + +### Generating a plot from an R script + +First, let's tell redo how to generate our chart. We'll use +the R language, and ask it to plot some of its sample data (the mpg, "miles +per gallon" data set) and save it to an eps (encapsulated postscript) file. +eps files are usually a good format for LaTeX embedded images, because they +scale to any printer or display resolution. + +First, let's make an R script that generates a plot: +
+ +And then a .do file to tie that into redo: +
+ +We can build and view the image: +```shell +$ redo mpg.eps +redo mpg.eps + +# View the file on Linux +$ evince mpg.eps + +# View the file on MacOS +$ open mpg.eps +``` + + +### Running the LaTeX processor + +Here's the first draft of our very important scientific paper: +
+ +Notice how it refers to the chart from above, `mpg.eps`, and a text file, +`discovery.txt`. Let's create the latter as a static file. +
+ +With all the parts of our document in places, we can now compile it directly +using `pdflatex`: +```shell +$ pdflatex paper.latex +This is pdfTeX, Version 3.14159265-2.6-1.40.17 (TeX Live 2016/Debian) (preloaded format=pdflatex) + restricted \write18 enabled. +entering extended mode +...[a lot of unnecessary diagnostic messages]... +Output written on paper.pdf (2 pages, 68257 bytes). +Transcript written on paper.log. +``` + +But this has a few problems. First of all, it doesn't understand +dependencies; if `mpg.R` changes, it won't know to rebuild `mpg.eps`. +Secondly, the TeX/LaTeX toolchain has an idiosyncracy that means you might +have to rebuild your document more than once. In our example, we generate a +table of contents, but it ends up getting generated *before* processing the +rest of the content in the document, so it's initially blank. As it +continues, LaTeX produces a file called `paper.aux` with a list of the +references needed by the table of contents, and their page numbers. If we +run LaTeX over again, it'll use that to build a proper of table of contents. + +Of course, life is not necessarily so easy. Once the table of contents +isn't blank, it might start to push content onto the next page. This will +change all the page numbers! So we'd have to do it one more time. And that +might lead to even more subtle problems, like a reference to page 99 +changing to page 100, which pushes a word onto the next page, which changes +some other page number, and so on. Thus, we need a script that will keep +looping, re-running LaTeX until `paper.aux` stabilizes. + +The whole script we'll use is below. Instead of running `pdflatex` +directly, we'll use the regular `latex` command, which produces a .dvi +(DeVice Independent) intermediate file which we can later turn into a pdf or +ps file. + +LaTeX produces a bunch of clutter files (like `paper.aux`) that can be used +in future runs, but which also make its execution nondeterministic. To +avoid that problem, we tell it to use a temporary `--output-directory` that +we delete and recreate before each build (although we might need to run +`latex` multiple times in one build, to get `paper.aux` to converge). +
+ + +### Virtual targets, side effects, and multiple outputs + +Why did we call our script `default.runtex.do`? Why not `default.pdf.do` or +`default.dvi.do`, depending what kind of file we ask LaTeX to produce? + +The problem is that the `latex` command actually produces several +files in that temporary directory, and we might want to keep them around. +If we name our .do file after only *one* of those outputs, things get messy. + +The biggest problem is that redo requires a .do file to write its output to +$3 (or stdout), so that it can guarantee the output gets replaced +atomically. When there is more than one output, at most one file can +be sent to $3; how do you choose which one? Even worse, some programs don't +even have the ability to choose the output filename; for an input of +`paper.latex`, the `latex` command just writes a bunch of files named +`paper.*` directly. You can't ask it to put just one of them in $3. + +The easiest way to handle this situation in redo is to use a "virtual +target", which is a target name that doesn't actually get created has a file, +and has only side effects. You've seen these before: when we use `all.do` +or `clean.do`, we don't expect to produce a file named `all` or `clean`. We +expect redo to run a collection of other commands. In `make`, these are +sometimes called ".PHONY rules" because of the way they are declared in a +`Makefile`. But the rules aren't phony, they really are executed; they just +don't produce output. So in redo we call them "virtual." + +When we `redo paper.runtex`, it builds our virtual target. There is no +`paper.runtex` file or directory generated. But as a side effect, a +directory named `paper.tmp` is created. + +(Side note: it's tempting to name the directory the same as the target. So +we could have a `paper.runtex` directory instead of `paper.tmp`. This is +not inherently a bad idea, but currently redo behaviour is undefined if you +redo-ifchange a directory. Directories are weird. If one file in that +directory disappears, does that mean you "modified" the output by hand? +What if two redo targets modify the same directory? Should we require +scripts to only atomically replace an entire output directory via $3? And +so on. We might carefully define this behaviour eventually, but for now, +it's better to use a separate directory name and avoid the undefined +behaviour.) + + +### Depending on side effects produced by virtual targets + +Next, we want to produce .pdf and .ps files from the collection of files +produced by the `latex` command, particularly `paper.tmp/paper.dvi`. To do +that, we have to bring our files back from the "virtual target" world into +the real world. + +Depending on virtual targets is easy; we'll just +`redo-ifchange paper.runtex`. Then we want to materialize `paper.dvi` from +the temporary files in `paper.tmp/paper.dvi`, which we can do with an +efficient [hardlink](https://en.wikipedia.org/wiki/Hard_link) (rather than +making an unnecessary copy), like this: +
+ +Notice that we *don't* do `redo-ifchange paper.tmp/paper.dvi`. That's +because redo has no knowledge of that file. If you ask redo to build that +file for you, it doesn't know how to do it. You have to ask for +`paper.runtex`, which you know - but redo doesn't know - will produce the +input file you want. Then you can safely use it. + +Once we have a .do file that produces the "real" (non-virtual, +non-side-effect) `paper.dvi` file, however, it's safe to depend directly on +it. Let's use that to produce our .ps and .pdf outputs: +
+
+ +(As above, we include `exec >&2` lines because LaTeX tools incorrectly write +their log messages to stdout. We need to redirect it all to stderr. That +way [redo-log](../../redo-log) can handle all the log output appropriately.) + + +### Explicit dependencies + +We've made a generalized script, `default.runtex.do`, that can compile any +.latex file and produce a .tmp directory with its output. But that's not +quite enough: different .latex files might have extra dependencies that need +to exist *before* the compilation can continue. In our case, we need the +auto-generated `mpg.eps` that we discussed above. + +To make that work, `default.runtex.do` looks for a .deps file with the same +name as the .latex file being processed. It contains just a list of extra +dependencies that need to be built. Here's ours: +
+ +You can use this same ".deps" technique in various different places in redo. +For example, you could have a default.do that can link a C program from any +set of .o files. To specify the right set of .o files for target `X`, +default.do might look in an `X.deps` or `X.list` file. If you later want to +get even fancier, you could make an `X.deps.do` that programmatically +generates the list of dependencies; for example, it might include one set of +files on win32 platforms and a different set on unix platforms. + + +### Autodependencies + +Our `paper.latex` file actually includes two files: `mpg.eps`, which we +explicitly depended upon above, and `discovery.txt`, which we didn't. The +latter is a static source file, so we can let redo discover it +automatically, based on the set of files that LaTeX opens while it runs. +The `latex` command has a `--record` option to do this; it produces a file +called `paper.tmp/paper.fls` (.fls is short for "File LiSt"). + +One of redo's best features is that you can declare dependencies *after* +you've done your build steps, when you have the best knowledge of which +files were actually needed. That's why in `default.runtex.do`, we parse the +.fls file and then redo-ifchange on its contents right at the end. + +(This brings up a rather subtle point about how redo works. When you run +redo-ifchange, redo adds to the list of files which, if they change, mean +your target needs to be rebuilt. But unlike make, redo will not actually +rebuild those files merely because they're listed as a dependency; it just +knows to rebuild your target, which means to run your .do file, which will +run redo-ifchange *again* if it still needs those input files to be fresh. + +This avoids an annoying problem in `make` where you can teach it about +which .h files your C program depended on last time, but if you change +A.c to no longer include X.h, and then delete X.h, make might complain +that X.h is missing, because A.c depended on it *last time*. redo will +simply notice that since X.h is missing, A.c needs to be recompiled, and let +your compilation .do script report an error, or not.) + +Anyway, this feature catches not just our `discovery.txt` dependency, but +also the implicit dependencies on various LaTeX template and font files, and +so on. If any of those change, our LaTeX file needs to be rebuilt. +```shell +$ redo --no-detail paper.pdf +redo paper.pdf +redo paper.dvi +redo paper.runtex +redo mpg.eps + +$ redo --no-detail paper.pdf +redo paper.pdf + +$ touch discovery.txt + +$ redo --no-detail paper.pdf +redo paper.pdf +redo paper.dvi +redo paper.runtex + +$ redo --no-detail paper.pdf +redo paper.pdf +``` + + +### Housekeeping + +As usual, to polish up our project, let's create an `all.do` and +`clean.do`. + +Because this project is included in the redo source and we don't want redo +to fail to build just because you don't have LaTeX or R installed, we'll +have `all.do` quit politely if the necessary tools are missing. +
+
diff --git a/Documentation/cookbook/latex/mpg.R b/Documentation/cookbook/latex/mpg.R new file mode 100644 index 0000000..da4b9a9 --- /dev/null +++ b/Documentation/cookbook/latex/mpg.R @@ -0,0 +1,4 @@ +library(ggplot2) + +qplot(mpg, wt, data = mtcars) + facet_wrap(~cyl) + theme_bw() +ggsave("mpg.new.eps", width=4, height=2, units='in') diff --git a/Documentation/cookbook/latex/mpg.eps.do b/Documentation/cookbook/latex/mpg.eps.do new file mode 100644 index 0000000..5fca829 --- /dev/null +++ b/Documentation/cookbook/latex/mpg.eps.do @@ -0,0 +1,7 @@ +redo-ifchange mpg.R +Rscript mpg.R >&2 +mv mpg.new.eps $3 + +# Some buggy ggplot2 versions produce this +# junk file; throw it away. +rm -f Rplots.pdf diff --git a/Documentation/cookbook/latex/paper.deps b/Documentation/cookbook/latex/paper.deps new file mode 100644 index 0000000..d670204 --- /dev/null +++ b/Documentation/cookbook/latex/paper.deps @@ -0,0 +1 @@ +mpg.eps diff --git a/Documentation/cookbook/latex/paper.latex b/Documentation/cookbook/latex/paper.latex new file mode 100644 index 0000000..4c443d6 --- /dev/null +++ b/Documentation/cookbook/latex/paper.latex @@ -0,0 +1,17 @@ +\documentclass{article} +\usepackage{graphicx} + +\title{A very brief note on relativity} +\author{The Redo Contributors} + +\begin{document} +\maketitle +\tableofcontents + +\newpage +\section{Amazing Discovery} +\input{discovery.txt} + +\section{Irrelevant Chart} +\includegraphics{mpg.eps} +\end{document} diff --git a/mkdocs.yml b/mkdocs.yml index 46f76c1..c1c51cd 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -15,6 +15,7 @@ pages: - Cookbook: - Hello World (hello.do, redo-ifchange): cookbook/hello/index.md - Text processing example (default.do, redo-whichdo, redo-always, redo-stamp): cookbook/defaults/index.md + - R plots and LaTeX to pdf (side effects, multiple outputs, autodepends): cookbook/latex/index.md - FAQ: - Basics: FAQBasics.md - Semantics: FAQSemantics.md