From 3923a7d3f8bdae0cce959c94556e48844c6cee41 Mon Sep 17 00:00:00 2001 From: Avery Pennarun Date: Wed, 2 Jan 2019 23:46:01 -0500 Subject: [PATCH] cookbook/container: example of building+running docker containers. This got... long... and complicated. But I think it's a really good demonstration of getting redo to do complicated things elegantly. At least, I hope it is. --- docs/.gitignore | 8 +- docs/clean.do | 3 +- docs/cookbook/all.do | 1 + docs/cookbook/clean.do | 1 + docs/cookbook/container/.gitignore | 17 + docs/cookbook/container/all.do | 88 ++ docs/cookbook/container/clean.do | 6 + docs/cookbook/container/debdownload.fs.do | 14 + docs/cookbook/container/debian.diffbase | 1 + docs/cookbook/container/debian.fs.do | 24 + docs/cookbook/container/debian.image.layers | 2 + docs/cookbook/container/debootstrap.fs.do | 19 + docs/cookbook/container/debootstrap.options | 3 + docs/cookbook/container/default.gz.do | 13 + docs/cookbook/container/default.image.do | 34 + docs/cookbook/container/default.initrd.do | 9 + docs/cookbook/container/default.layer.do | 9 + docs/cookbook/container/default.list.do | 24 + docs/cookbook/container/default.load.do | 3 + docs/cookbook/container/default.rundocker.do | 4 + docs/cookbook/container/default.runkvm.do | 34 + docs/cookbook/container/default.runlocal.do | 25 + docs/cookbook/container/default.sha256.do | 16 + docs/cookbook/container/dockjson.py | 21 + docs/cookbook/container/fileids.py | 21 + docs/cookbook/container/index.md | 821 +++++++++++++++++++ docs/cookbook/container/libs.diffbase | 1 + docs/cookbook/container/libs.fs.do | 21 + docs/cookbook/container/memcalc.py | 8 + docs/cookbook/container/need.sh | 9 + docs/cookbook/container/rdinit | 5 + docs/cookbook/container/simple.fs.do | 25 + docs/cookbook/container/simple.image.layers | 2 + docs/cookbook/container/template.json | 62 ++ docs/cookbook/container/try_fakeroot.sh | 13 + docs/cookbook/container/xclean.do | 3 + docs/t/.gitignore | 2 + mkdocs.yml | 10 +- 38 files changed, 1375 insertions(+), 7 deletions(-) create mode 100644 docs/cookbook/container/.gitignore create mode 100644 docs/cookbook/container/all.do create mode 100644 docs/cookbook/container/clean.do create mode 100644 docs/cookbook/container/debdownload.fs.do create mode 100644 docs/cookbook/container/debian.diffbase create mode 100644 docs/cookbook/container/debian.fs.do create mode 100644 docs/cookbook/container/debian.image.layers create mode 100644 docs/cookbook/container/debootstrap.fs.do create mode 100644 docs/cookbook/container/debootstrap.options create mode 100644 docs/cookbook/container/default.gz.do create mode 100644 docs/cookbook/container/default.image.do create mode 100644 docs/cookbook/container/default.initrd.do create mode 100644 docs/cookbook/container/default.layer.do create mode 100644 docs/cookbook/container/default.list.do create mode 100644 docs/cookbook/container/default.load.do create mode 100644 docs/cookbook/container/default.rundocker.do create mode 100644 docs/cookbook/container/default.runkvm.do create mode 100644 docs/cookbook/container/default.runlocal.do create mode 100644 docs/cookbook/container/default.sha256.do create mode 100755 docs/cookbook/container/dockjson.py create mode 100755 docs/cookbook/container/fileids.py create mode 100644 docs/cookbook/container/index.md create mode 100644 docs/cookbook/container/libs.diffbase create mode 100644 docs/cookbook/container/libs.fs.do create mode 100755 docs/cookbook/container/memcalc.py create mode 100755 docs/cookbook/container/need.sh create mode 100755 docs/cookbook/container/rdinit create mode 100644 docs/cookbook/container/simple.fs.do create mode 100644 docs/cookbook/container/simple.image.layers create mode 100644 docs/cookbook/container/template.json create mode 100755 docs/cookbook/container/try_fakeroot.sh create mode 100644 docs/cookbook/container/xclean.do create mode 100644 docs/t/.gitignore diff --git a/docs/.gitignore b/docs/.gitignore index 101eb2a..c474ed2 100644 --- a/docs/.gitignore +++ b/docs/.gitignore @@ -1,5 +1,5 @@ -*.1 +/*.1 /md-to-man -*.html -*.man.md -*.list +/*.html +/*.man.md +/*.list diff --git a/docs/clean.do b/docs/clean.do index 664484e..2154827 100644 --- a/docs/clean.do +++ b/docs/clean.do @@ -1,2 +1,3 @@ redo cookbook/clean -rm -f *~ .*~ *.1 t/*.1 md-to-man *.tmp t/*.tmp *.html */*.html *.list +rm -f *~ .*~ t/*~ t/.*~ *.1 t/*.1 \ + md-to-man *.tmp t/*.tmp *.html */*.html *.list diff --git a/docs/cookbook/all.do b/docs/cookbook/all.do index 01352a6..7d12d02 100644 --- a/docs/cookbook/all.do +++ b/docs/cookbook/all.do @@ -1,3 +1,4 @@ +export NO_SLOW_TESTS=1 for d in */all.do; do echo "${d%.do}" done | xargs redo-ifchange diff --git a/docs/cookbook/clean.do b/docs/cookbook/clean.do index 293816d..0002091 100644 --- a/docs/cookbook/clean.do +++ b/docs/cookbook/clean.do @@ -1,3 +1,4 @@ +rm -f *~ .*~ for d in */clean.do; do echo "${d%.do}" done | xargs redo diff --git a/docs/cookbook/container/.gitignore b/docs/cookbook/container/.gitignore new file mode 100644 index 0000000..6cc4ebd --- /dev/null +++ b/docs/cookbook/container/.gitignore @@ -0,0 +1,17 @@ +debdownload +debootstrap +debian +libs +simple +*.dockjson +*.fakeroot +*.fs +*.gz +*.image +*.initrd +*.layer +*.list +*.runkvm +*.runlocal +*.rundocker +*.sha256 diff --git a/docs/cookbook/container/all.do b/docs/cookbook/container/all.do new file mode 100644 index 0000000..a4c56df --- /dev/null +++ b/docs/cookbook/container/all.do @@ -0,0 +1,88 @@ +exec >&2 +no_simple= +no_debian= +no_runlocal= +no_runkvm= +no_docker= + +if ! ./need.sh ldd; then + echo "skipping simple image." + no_simple=1 +fi +if ! ./need.sh debootstrap eatmydata; then + echo "skipping debian image." + no_debian=1 +fi +if ! ./need.sh fakeroot fakechroot || + ! ./try_fakeroot.sh "x" true 2>/dev/null; then + echo "skipping chroot test." + no_runlocal=1 + echo "skipping debian image." + no_debian=1 +fi +if ! ./need.sh unshare || + ! unshare -r true 2>/dev/null; then + echo " -- 'unshare -r' command doesn't work." + echo "skipping chroot test." + no_runlocal=1 +fi +if ! ./need.sh busybox kvm; then + echo "skipping kvm test." + no_runkvm=1 +fi +if ! ./need.sh docker || + ! docker images >/dev/null; then + echo "skipping docker test." + no_docker=1 +fi +if [ -n "$NO_SLOW_TESTS" ]; then + echo " -- NO_SLOW_TESTS is set." + echo "skipping debian image." + no_debian=1 +fi + +add() { targets="$targets $*"; } + +[ -z "$no_simple" ] && add simple.image.gz +[ -z "$no_simple$no_runlocal" ] && add libs.runlocal +[ -z "$no_simple$no_runkvm" ] && add libs.runkvm +[ -z "$no_simple$no_docker" ] && add simple.rundocker + +[ -z "$no_debian" ] && add debian.image +[ -z "$no_debian$no_runlocal" ] && add debian.runlocal +[ -z "$no_debian$no_runkvm" ] && add debian.runkvm +[ -z "$no_debian$no_docker" ] && add debian.rundocker + +redo-ifchange $targets + +check() { + label=$1 + shift + printf "checking %-18s %-35s " "$label:" "$*" >&2 + if test "$@"; then + printf "ok\n" >&2 + else + printf "failed\n" >&2 + fi +} + +hellocheck() { + check "$1" "$(cat "$1")" = "Hello, world!" +} + +debcheck() { + check "$1" "$(cat "$1")" -ge "70" + check "$1" "$(cat "$1")" -le "100" +} + +if [ -z "$no_simple" ]; then + [ -n "$no_runlocal" ] || hellocheck libs.runlocal + [ -n "$no_runkvm" ] || hellocheck libs.runkvm + [ -n "$no_docker" ] || hellocheck simple.rundocker +fi + +if [ -z "$no_debian" ]; then + [ -n "$no_runlocal" ] || debcheck debian.runlocal + [ -n "$no_runkvm" ] || debcheck debian.runkvm + [ -n "$no_docker" ] || debcheck debian.rundocker +fi diff --git a/docs/cookbook/container/clean.do b/docs/cookbook/container/clean.do new file mode 100644 index 0000000..24431d6 --- /dev/null +++ b/docs/cookbook/container/clean.do @@ -0,0 +1,6 @@ +rm -rf *~ .*~ simple libs debian \ + *.fs debian.fakeroot *.gz \ + *.dockjson *.image *.image.gz *.initrd \ + *.layer *.list \ + *.runkvm *.runlocal *.rundocker \ + *.sha256 diff --git a/docs/cookbook/container/debdownload.fs.do b/docs/cookbook/container/debdownload.fs.do new file mode 100644 index 0000000..2d1758c --- /dev/null +++ b/docs/cookbook/container/debdownload.fs.do @@ -0,0 +1,14 @@ +exec >&2 +fs=${1%.fs} + +# let's *not* delete this directory; it's okay if previously-downloaded +# excess packages hang around in case we need them later. +#rm -rf "$fs" +mkdir -p "$fs" +redo-ifchange debootstrap.options +debootstrap \ + --download-only \ + --keep-debootstrap-dir \ + $(cat debootstrap.options) \ + "$fs" +redo-ifchange "$fs/debootstrap/debootstrap.log" diff --git a/docs/cookbook/container/debian.diffbase b/docs/cookbook/container/debian.diffbase new file mode 100644 index 0000000..cba8606 --- /dev/null +++ b/docs/cookbook/container/debian.diffbase @@ -0,0 +1 @@ +debootstrap diff --git a/docs/cookbook/container/debian.fs.do b/docs/cookbook/container/debian.fs.do new file mode 100644 index 0000000..eb94044 --- /dev/null +++ b/docs/cookbook/container/debian.fs.do @@ -0,0 +1,24 @@ +exec >&2 +fs=${1%.fs} +rm -rf "$fs" "$fs.fakeroot" + +redo-ifchange debootstrap.fs +fakeroot -i debootstrap.fakeroot -s "$fs.fakeroot" cp -a debootstrap/. "$fs" + +# Work around bug (in fakechroot?) where /lib64 symlink ends up pointing +# at an absolute path including $PWD, rather than inside the chroot. +# Rather than fix the symlink, we'll just make sure $PWD is a link to /, +# so that the "wrong" symlinks correctly resolve. +pwdir=$(dirname "$PWD/bootstrap/") +mkdir -p "$fs/$pwdir/debootstrap" +dots=$(echo "$pwdir/" | sed -e 's,[^/]*/,../,g') +ln -s "${dots}lib" "$fs/$pwdir/debootstrap/lib" + +# /init script is what we run in 'docker run' +cat >"$fs/init" <<-EOF + #!/bin/sh + dpkg -l | wc -l +EOF +chmod a+x "$fs/init" + +redo-ifchange "$fs/bin/sh" diff --git a/docs/cookbook/container/debian.image.layers b/docs/cookbook/container/debian.image.layers new file mode 100644 index 0000000..366fc05 --- /dev/null +++ b/docs/cookbook/container/debian.image.layers @@ -0,0 +1,2 @@ +debootstrap +debian diff --git a/docs/cookbook/container/debootstrap.fs.do b/docs/cookbook/container/debootstrap.fs.do new file mode 100644 index 0000000..279c44c --- /dev/null +++ b/docs/cookbook/container/debootstrap.fs.do @@ -0,0 +1,19 @@ +exec >&2 +fs=${1%.fs} +rm -rf "$fs" "$fs.fakeroot" + +redo-ifchange debdownload.fs debootstrap.options +cp -a debdownload/. "$fs" +eatmydata \ + fakechroot \ + fakeroot -s "$fs.fakeroot" \ + debootstrap $(cat debootstrap.options) "$fs" + +# Clean up installed package files +rm -f "$fs"/var/cache/apt/archives/*.deb \ + "$fs"/var/cache/apt/*.bin \ + "$fs"/var/lib/apt/lists/*Packages \ + "$fs"/var/lib/apt/lists/*Sources \ + "$fs"/var/lib/apt/lists/debootstrap* + +redo-ifchange "$fs/bin/sh" diff --git a/docs/cookbook/container/debootstrap.options b/docs/cookbook/container/debootstrap.options new file mode 100644 index 0000000..88b9c92 --- /dev/null +++ b/docs/cookbook/container/debootstrap.options @@ -0,0 +1,3 @@ +--variant=minbase +--include=busybox +stretch diff --git a/docs/cookbook/container/default.gz.do b/docs/cookbook/container/default.gz.do new file mode 100644 index 0000000..29debdb --- /dev/null +++ b/docs/cookbook/container/default.gz.do @@ -0,0 +1,13 @@ +redo-ifchange "$2" + +# On freebsd, 'gzip --rsyncable' fails but returns 0. +# We have to detect lack of --rsyncable some other way. +gzt=$(gzip --rsyncable -c /dev/null | wc -c) +if [ "$gzt" -gt 0 ]; then + # when available, --rsyncable makes compressed + # files much more efficient to rsync when they + # change slightly. + gzip --rsyncable -c <$2 >$3 +else + gzip -c <$2 >$3 +fi diff --git a/docs/cookbook/container/default.image.do b/docs/cookbook/container/default.image.do new file mode 100644 index 0000000..75d6aea --- /dev/null +++ b/docs/cookbook/container/default.image.do @@ -0,0 +1,34 @@ +redo-ifchange template.json "$1.layers" +layers=$(cat "$1.layers") + +dir=$3.tmp +rm -rf "$dir" +mkdir -p "$dir" + +# Build all layers in parallel +for layer in $layers; do + echo "$layer.list.sha256" + echo "$layer.layer" +done | xargs redo-ifchange + +ids= +parent= +for layer in $layers; do + read cid <$layer.list.sha256 + echo "layer: $cid $layer" >&2 + + # docker seems to order its image tarballs latest-first, + # so the base layer is last. We'll create in order from + # base layer to final layer, but create a tarball in the + # opposite order. + ids="$cid $ids" # prepend + + mkdir "$dir/$cid" + echo "1.0" >$dir/$cid/VERSION + ./dockjson.py "$layer" "$parent" >$dir/$cid/json + ln "$layer.layer" "$dir/$cid/layer.tar" + parent=$layer +done <$1.layers + +tar -C "$dir" -cf - $ids >$3 +rm -rf "$dir" diff --git a/docs/cookbook/container/default.initrd.do b/docs/cookbook/container/default.initrd.do new file mode 100644 index 0000000..c2330c6 --- /dev/null +++ b/docs/cookbook/container/default.initrd.do @@ -0,0 +1,9 @@ +redo-ifchange "$2.fs" rdinit +d=$PWD +fs=$2 +( + (cd "$fs" && find . -print0 | + "$d/try_fakeroot.sh" "$d/$2.fakeroot" \ + cpio -Hnewc -0 -o) + printf 'rdinit\0' | cpio -Hnewc -0 -o +) >$3 diff --git a/docs/cookbook/container/default.layer.do b/docs/cookbook/container/default.layer.do new file mode 100644 index 0000000..6228dd8 --- /dev/null +++ b/docs/cookbook/container/default.layer.do @@ -0,0 +1,9 @@ +d=$PWD +redo-ifchange "$2.fs" "$2.list" + +sed -e 's/ [^ ]*$//' <$2.list | +( + cd "$2" + "$d/try_fakeroot.sh" "$d/$2.fakeroot" \ + cpio -Hustar -o +) >$3 diff --git a/docs/cookbook/container/default.list.do b/docs/cookbook/container/default.list.do new file mode 100644 index 0000000..f7cdf1f --- /dev/null +++ b/docs/cookbook/container/default.list.do @@ -0,0 +1,24 @@ +d=$PWD +redo-ifchange "$2.fs" + +if [ -e "$2.diffbase" ]; then + redo-ifchange "$2.diffbase" + read diffbase <$2.diffbase + diffbase=$diffbase.list + redo-ifchange "$diffbase" +else + diffbase=/dev/null + redo-ifcreate "$2.diffbase" +fi + +( + cd "$2" && + find . -print | sort | "$d/try_fakeroot.sh" "$d/$2.fakeroot" "$d/fileids.py" +) >$1.tmp + +comm -1 -3 "$diffbase" "$1.tmp" >$3 +rm -f "$1.tmp" + +# Sanity check +nbytes=$(wc -c <"$3") +test $nbytes -gt 0 diff --git a/docs/cookbook/container/default.load.do b/docs/cookbook/container/default.load.do new file mode 100644 index 0000000..bfbf4c4 --- /dev/null +++ b/docs/cookbook/container/default.load.do @@ -0,0 +1,3 @@ +redo-ifchange "$2.image" +./need.sh docker +docker load <$2.image diff --git a/docs/cookbook/container/default.rundocker.do b/docs/cookbook/container/default.rundocker.do new file mode 100644 index 0000000..4421856 --- /dev/null +++ b/docs/cookbook/container/default.rundocker.do @@ -0,0 +1,4 @@ +redo-ifchange "$2.load" "$2.list.sha256" +./need.sh docker +read container_id <$2.list.sha256 +docker run "$container_id" diff --git a/docs/cookbook/container/default.runkvm.do b/docs/cookbook/container/default.runkvm.do new file mode 100644 index 0000000..6153620 --- /dev/null +++ b/docs/cookbook/container/default.runkvm.do @@ -0,0 +1,34 @@ +exec >&2 +./need.sh python kvm busybox + +redo-ifchange "$2.initrd" memcalc.py +rm -f "$3.out" "$3.code" + +# Linux only allows an initrd of size < 50% of RAM, +# so set a RAM amount based on the initrd size. +mem=$(./memcalc.py "$2.initrd") +echo "$2: kvm memory required: $mem" + +kvm \ + -m "$mem" \ + -kernel /boot/vmlinuz-$(uname -r) \ + -initrd "$2.initrd" \ + -append 'rdinit=/rdinit panic=1 console=ttyS0 loglevel=4' \ + -no-reboot \ + -display none \ + -chardev stdio,mux=on,id=char0 \ + -chardev file,id=char1,path="$3.out" \ + -chardev file,id=char2,path="$3.code" \ + -serial chardev:char0 \ + -serial chardev:char1 \ + -serial chardev:char2 +read rv <$3.code || true +[ -z "$rv" ] && exit 99 +if [ "$rv" -eq 0 ]; then + sed -e 's/\r//g' "$3.out" >$3 + echo "ok." >&2 +else + echo "kvm program returned error: $rv" >&2 +fi +rm -f "$3.out" "$3.code" +exit "$rv" diff --git a/docs/cookbook/container/default.runlocal.do b/docs/cookbook/container/default.runlocal.do new file mode 100644 index 0000000..8feb3a6 --- /dev/null +++ b/docs/cookbook/container/default.runlocal.do @@ -0,0 +1,25 @@ +redo-ifchange "$2.fs" + +./need.sh unshare + +set +e +unshare -r chroot "$2" /init >$3 +rv=$? +if [ "$rv" != 0 ]; then + f=/proc/sys/kernel/unprivileged_userns_clone + if [ -e "$f" ]; then + read v <$f + if [ "$v" -eq 0 ]; then + echo "Try: echo 1 >$f" >&2 + fi + fi + + f=/proc/sys/kernel/userns_restrict + if [ -e "$f" ]; then + read v <$f + if [ "$v" -ne 0 ]; then + echo "Try: echo 0 >$f" >&2 + fi + fi +fi +exit "$rv" diff --git a/docs/cookbook/container/default.sha256.do b/docs/cookbook/container/default.sha256.do new file mode 100644 index 0000000..bb6e042 --- /dev/null +++ b/docs/cookbook/container/default.sha256.do @@ -0,0 +1,16 @@ +#!/usr/bin/env python +"""Calculate the sha256 digest of a given file.""" +import hashlib, os, subprocess, sys + +subprocess.check_call([ + 'redo-ifchange', + sys.argv[2], +]) + +h = hashlib.sha256() +f = open(sys.argv[2]) +while 1: + b = f.read(65536) + if not b: break + h.update(b) +open(sys.argv[3], 'w').write(h.hexdigest() + '\n') diff --git a/docs/cookbook/container/dockjson.py b/docs/cookbook/container/dockjson.py new file mode 100755 index 0000000..cb01640 --- /dev/null +++ b/docs/cookbook/container/dockjson.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python +"""Generate a docker 1.0-style manifest for a docker image.""" +import json, os, subprocess, sys, time + +j = json.load(open('template.json')) +layerid = open(sys.argv[1] + '.list.sha256').read().strip() +j['id'] = layerid + +if len(sys.argv) > 2 and sys.argv[2]: + parentid = open(sys.argv[2] + '.list.sha256').read().strip() + j['parent'] = parentid + +t = time.time() +gt = time.gmtime(t) +nsec = int(t * 1e9) % 1000000000 +j['created'] = time.strftime('%Y-%m-%dT%H:%M:%S', gt) + ('.%09dZ' % nsec) + +nbytes = os.stat(sys.argv[1] + '.layer').st_size +j['Size'] = nbytes + +json.dump(j, sys.stdout, indent=2) diff --git a/docs/cookbook/container/fileids.py b/docs/cookbook/container/fileids.py new file mode 100755 index 0000000..0bc6319 --- /dev/null +++ b/docs/cookbook/container/fileids.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python +import hashlib, os, stat, sys + +for name in sys.stdin: + name = name[:-1] # skip terminating newline + st = os.lstat(name) + if stat.S_ISREG(st.st_mode): + h = hashlib.sha256() + f = open(name) + while 1: + b = f.read(65536) + if not b: break + h.update(b) + digest = h.hexdigest() + elif stat.S_ISLNK(st.st_mode): + digest = hashlib.sha256(os.readlink(name)).hexdigest() + else: + digest = '0' + print('%s %07o-%s-%s-%s' % ( + name, + st.st_mode, st.st_uid, st.st_gid, digest)) diff --git a/docs/cookbook/container/index.md b/docs/cookbook/container/index.md new file mode 100644 index 0000000..63cf2cf --- /dev/null +++ b/docs/cookbook/container/index.md @@ -0,0 +1,821 @@ +### Containers + +"Containers" became popular a few years ago with the emergence of +[Docker](https://www.docker.com/), but they are actually the result of a +long line of evolution starting with +[chroot](https://en.wikipedia.org/wiki/Chroot), a concept which dates all +the way back to 1979. The idea of a container, or a chroot, is to run a +process or set of processes in a (more or less) isolated environment that's +separate from your main operating system. + +The first iteration, chroot, only isolated the filesystem: chroot would +"change" the "root" directory (hence the name) to a subdirectory of the main +filesystem, then run a program that would see only files in that +subdirectory. Among other things, this was used as a way to prevent rogue +programs from accidentally damaging other files on the system. But it +wasn't particularly safe, especially because any program running with +administrator privileges could play tricks and eventually switch its root +back to the "real" root directory. Separately from security, though, it's +sometimes interesting to install a different operating system variant in a +subdirectory, then chroot into it and run programs that require that +operating system version. For example, if you're running the latest version +of Debian Linux, but you want to build an application that only builds +correctly on the Debian version from 5 years ago, you can install the +5-years-ago Debian files in a directory, chroot into that, and build your +application. The main limitation is that your "host" system and your chroot +environment share the same kernel version, and rogue programs usually can +find a way to escape the chroot, so it's not useful if your inner system is +running dangerous code. + +Partly in response to the limitations of chroot, "virtualization" started to +gain popularity around 2001, made famous by VMware. (IBM mainframes had +been doing something similar for a few decades, but not many people knew how +IBM mainframes worked.) Anyway, virtualization simulates a computer's actual +hardware and lets you run a different kernel on the virtual hardware, and a +filesystem inside that hardware. This has several advantages, including +much stricter security separation and the ability to run a different kernel +or even a different "guest" operating system than the one on the host. +Virtualization used to be pretty slow, but it's gotten faster and faster +over the years, especially with the introduction of "paravirtualization," +where we emulate special virtual-only "hardware" that needs special drivers +in the guest, in exchange for better performance. On Linux, the easiest +type of paravirtualization nowadays is +[kvm](https://www.linux-kvm.org/page/Main_Page) (kernel virtual machine), a +variant of [QEMU](https://www.qemu.org/). + +Virtual machines provide excellent security isolation, but at the expense of +performance, since every VM instance needs to have its own kernel, drivers, +init system, terminal emulators, memory management, swap space, and so on. +In response to this, various designers decided to go back to the old +`chroot` system and start fixing the isolation limits, one by one. The +history from here gets a bit complicated, since there are many, overlapping, +new APIs that vary between operating systems and versions. Eventually, this +collection of features congealed into what today we call "containers," in +products like [OpenVZ](https://en.wikipedia.org/wiki/OpenVZ), +[LXC](https://en.wikipedia.org/wiki/LXC), and (most famously) Docker. + +Why are we talking about all this? Because in this tutorial, we'll use +`redo` to build and run three kinds of containers (chroot, kvm, and docker), +sharing the same app build process between all three. redo's dependency and +parallelism management makes it easy to build multiple container types in +parallel, share code between different builds, and use different container +types (each with different tradeoffs) for different sorts of testing. + + +### A Hello World container + +Most Docker tutorials start at the highest level of abstraction: download +someone else's container, copy your program into it, and run your program in +a container. In the spirit of redo's low-level design, we're going to do +the opposite, starting at the very lowest level and building our way up. +The lowest level is, of course, Hello World, which we compiled (with redo of +course) in [an earlier tutorial](../hello/): +
+ +In fact, our earlier version of Hello World is a great example of redo's +safe recursion. Instead of producing an app as part of this tutorial, we'll +just `redo-ifchange ../hello/hello` from in our new project, confident that +redo will figure out any locking, dependency, consistency, and parallelism +issues. (This sort of thing usually doesn't work very well in `make`, +because you might get two parallel sub-instances of `make` recursing into +the `../hello` directory simultaneously, stomping on each other.) + +For our first "container," we're just going to build a usable chroot +environment containing our program (`/bin/hello`) and the bare minimum +requirements of an "operating system": a shell (`/bin/sh`), an init script +(`/init`, which will just be a symlink to `/bin/hello`), and, for debugging +purposes, the all-purpose [busybox](https://busybox.net/about.html) program. + +Here's a .do script that will build our simple container filesystem: +
+ +There's a catch here. Did you see it above? In current versions of redo, +the semantics of a .do script producing a directory as its output are +undefined. That's because the redo authors haven't yet figured out quite +what ought to happen when a .do file creates a directory. Or rather, +what happens *after* you create a directory? Can people `redo-ifchange` on +a file inside that newly created directory? What if the new directory +contains .do files? What if you `redo-ifchange` one of the sub-files before +you `redo-ifchange` the directory that contains it, so that the sub-file's +.do doesn't exist yet? And so on. We don't know. So for now, to stop you +from depending on this behaviour, we intentionally made it not work. + +Instead of that, you can have a .do script that produces a *different* +directory as a side effect. So above, `simple.fs.do` produces a directory +called `simple` when you run `redo simple.fs`. `simple.fs` is the +(incidentally empty) output, which is managed by redo and which other +scripts can depend upon using `redo-ifchange simple.fs`. The `simple` +directory just happens to materialize, and redo doesn't know anything about +it, which means it doesn't try to do anything about it, and you don't have +to care what redo's semantics for it might someday be. In other words, +maybe someday we'll find a more elegant way to handle .do files that create +directories, but we won't break your old code when we do. + +Okay? + +All right, one more catch. Operating systems are complicated, and there's +one more missing piece. Our Hello World program is *dynamically linked*, +which means it depends on shared libraries elsewhere in the system. You can +see exactly which ones by using the `ldd` command: +```shell +$ ldd ../hello/hello + linux-vdso.so.1 (0x00007ffd1ffca000) + libc.so.6 => /lib/x86_64-linux-gnu/libc.so.6 (0x00007f9ddf8fd000) + /lib64/ld-linux-x86-64.so.2 (0x00007f9ddfe9e000) +``` + +If we `chroot` into our simplistic "container" and try to run `hello`, it +won't work, because those libraries aren't available to programs inside the +chroot. That's the whole point of chroot, after all! + +How do we fix it? We get a list of the libraries with `ldd`, and then +we copy the libraries into place. + +Actually, for reasons we'll address below, let's make a copy of the new +filesystem and copy the new libraries into *that*: +
+ +So now there's a directory called `simple`, which contains our program and +some helper programs, and one called `libs`, which contains all that stuff, +plus the supporting libraries. That latter one is suitable for use with +chroot. + + +### Running a container with `unshare` and `chroot` + +So let's run it! We can teach redo how to start a program inside any chroot +by using a `default.do` script. In this case, we'll use +`default.runlocal.do`. With that file in place, when we run `redo +whatever.runlocal` (for any value of `whatever`), redo will first construct +the `whatever` directory (using `redo-ifchange whatever.fs`), and then +chroot into it and run `/init` inside. We'll collect stdout into the redo +output (ie. the file outside the chroot named `whatever.runlocal`). Also, +the stderr will go to redo's build log, readable with [redo-log](/redo-log/) +or on the console at build time, and if the `/init` script returns a nonzero +exit code, so will our script. As a result, the whole container execution +will act like a single node in our build process. It can depend on other +things, and other things can depend on it. + +Just one more thing: once upon a time, `chroot` was only available to +sysadmins, not normal users. And it's never a good idea to run your build +scripts as root. Luckily, Linux recently got a feature called "user +namespaces" (userns), which, among many other things, lets non-administrator +users use `chroot`. This is a really great addition. + +(Unfortunately, some people worry that user namespaces might create security +holes. From an abundance of caution, many OSes disable user namespaces for +non-administrators by default. So most of this script is just detecting +those situations so it can give you a useful warning. The useful part of +the script is basically just: `unshare -r chroot "$2" /init >$3`. Alas, +the subsequent error handling makes our script look long and complicated.) + +
+ +Speaking of error handling, the script above calls a script called +`./need.sh`, which is just a helper that prints a helpful error message and +aborts right away if the listed programs are not available to run, rather +than failing in a more complicated way. We'll use that script more +extensively below. +
+ +And that's it! A super simple container! +```shell +$ redo libs.runlocal +redo libs.runlocal +redo libs.fs +redo simple.fs + +$ time redo libs.runlocal +redo libs.runlocal + +real 0m0.112s +user 0m0.060s +sys 0m0.024s + +$ du libs +792 libs/bin +156 libs/lib64 +1656 libs/lib/x86_64-linux-gnu +1660 libs/lib +3752 libs + +cat libs.runlocal +Hello, world! +``` + +By the way, if this were a docker tutorial, it would still print "Hello, +world!" but your container would be >100 megabytes instead of 3.7 megabytes, +and it would have taken at least a couple of seconds to start instead of +0.11 seconds. But we'll get to that later. First, now that we have a +container, let's do more stuff with it! + +### Running a container with `kvm` and `initrd` + +Now you've seen chroot in action, but we can run almost the same container +in `kvm` (kernel virtual machine) instead, with even greater isolation. +`kvm` only runs on Linux, so for this step you'll need a Linux machine. And +for our example, we'll just have it run exactly the same kernel you're +already using, although kvm has the ability to use whatever kernel you want. +(You could even build a kernel as part of your redo project, redo-ifchange +it, and then run it with kvm. But we're not going to do that.) + +Besides a kernel, kvm needs an "initial ramdisk", which is where it'll get +its filesystem. (kvm can't exactly access your normal filesystem, +because it's emulating hardware, and there's no such thing as "filesystem +hardware." There are tools like the [9p +filesystem](https://www.kernel.org/doc/Documentation/filesystems/9p.txt) +that make this easier, but it's not available in all kernel builds, so we'll +avoid it for now.) + +"Initial ramdisk" (initrd) sounds fancy, but it's actually just a tarball +(technically, a [cpio](https://en.wikipedia.org/wiki/Cpio) archive) that the +kernel extracts into a ramdisk at boot time. Since we already have the +files, making the tarball is easy: +
+ +(Ignore that `try_fakeroot.sh` thing for now. We'll get to it a bit further +down. In our `simple.fs` example, it's a no-op anyway.) + +The main thing you need to know is that, unlike tar, cpio takes a list of +files on stdin instead of on the command line, and it doesn't recurse +automatically (so if you give it a directory name, it'll store an entry for +that directory, but not its contents, unless you also provide a list of its +contents). This gives us a lot of power, which we'll use later. For now +we're just doing basically `find | cpio -o`, which takes all the files and +directories and puts them in a cpio archive file. +```shell +$ redo libs.initrd +redo libs.initrd +5163 blocks +1 block + +$ cpio -t + +Configuring a virtual machine can get a little complicated, and there are a +million things we might want to do. One of the most important is setting +the size of the ramdisk needed for the initrd. Current Linux versions limit +the initrd to half the available RAM in the (virtual) machine, so to be +safe, we'll make sure to configure kvm to provide at least 3x as much RAM as +the size of the initrd. Here's a simple script to calculate that: +
+ +With all those pieces in place, actually executing the kvm is pretty +painless. Notice in particular the three serial ports we create: one for +the console (stderr), one for the output (stdout), and one for the exit +code: +
+ +And it works! +```shell +$ redo libs.runkvm +redo libs.runkvm +redo libs.initrd +5163 blocks +1 block +libs: kvm memory required: 70M +[ 0.306682] reboot: Power down +ok. + +$ time redo libs.runkvm +redo libs.runkvm +libs: kvm memory required: 70M +[ 0.295139] reboot: Power down +ok. + +real 0m0.887s +user 0m0.748s +sys 0m0.112s + +$ cat libs.runkvm +Hello, world! +``` + +Virtual machines have come a long way since 1999: we managed to build an +initrd, boot kvm, run our program, and shut down in only 0.9 seconds. It +could probably go even faster if we used a custom-built kernel with no +unnecessary drivers. + + +### A real Docker container + +Okay, that was fun, but nobody in real life cares about all these fast, +small, efficient isolation systems that are possible for mortals to +understand, right? We were promised a **Container System**, and a container +system has daemons, and authorization, and quotas, and random delays, and +some kind of Hub where I can download (and partially deduplicate) someone +else's multi-gigabyte Hello World images that are built in a highly +sophisticated enterprise-ready collaborative construction process. Come on, +tell me, can redo do **that**? + +Of course! But we're going to get there the long way. + +First, let's use the big heavy Container System with daemons and delays to +run our existing tiny understandable container. After that, we'll show how +to build a huge incomprehensible container that does the same thing, so your +co-workers will think you're normal. + +#### Docker and layers + +Normal people build their Docker containers using a +[Dockerfile](https://docs.docker.com/engine/reference/builder/). A +Dockerfile is sort of like a non-recursive redo, or maybe a Makefile, except +that it runs linearly, without the concept of dependencies or +parallelization. In that sense, I guess it's more like an IBM mainframe job +control script from 1970. It even has KEYWORDS in ALL CAPS, just like 1970. + +Dockerfiles do provide one really cool innovation over IBM job control +scripts, which is that they cache intermediate results so you don't have to +regenerate it every time. Basically, every step in a Dockerfile copies a +container, modifies it slightly, and saves the result for use in the next +step. If you modify step 17 and re-run the Dockerfile, it can just start +with the container produced by step 16, rather than going all the way back +to step 1. This works pretty well, although it's a bit expensive to start +and stop a container at each build step, and it's unclear when and how +interim containers are expunged from the cache later. And some of your +build steps are "install the operating system" and "install the compiler", +so each step produces a larger and larger container. A very common mistake +among Docker users is to leave a bunch of intermediate files (source code, +compilers, packages, etc) installed in the output container, bloating it up +far beyond what's actually needed to run the final application. + +Spoiler: we're not going to do it that way. + +Instead, let's use redo to try to get the same Dockerfile advantages +(multi-stage cache; cheap incremental rebuilds) without the disadvantages +(launching and unlaunching containers; mixing our build environment with our +final output). + +To understand how we'll do this, we need to talk about +[Layers](https://medium.com/@jessgreb01/digging-into-docker-layers-c22f948ed612). +Unlike our kvm initrd from earlier, a Docker image is not just a single +tarball; it's a sequence of tarballs, each containing the set of files +changed at each step of the build process. This layering system is how +Docker's caching and incremental update system works: if I incrementally +build an image starting from step 17, based on the pre-existing output from +step 16, then the final image can just re-use layers 1..16 and provide new +layers 17..n. Usually, the first few layers (install the operating system, +install the compilers, etc) are the biggest ones, so this means a new +version of an image takes very little space to store or transfer to a system +that already has the old one. + +The inside of a docker image looks like this: +```shell + $ tar -tf test.image +ae5419fd49e39e4dc0baab438925c1c6e4417c296a8b629fef5ea93aa6ea481c/ +ae5419fd49e39e4dc0baab438925c1c6e4417c296a8b629fef5ea93aa6ea481c/VERSION +ae5419fd49e39e4dc0baab438925c1c6e4417c296a8b629fef5ea93aa6ea481c/json +ae5419fd49e39e4dc0baab438925c1c6e4417c296a8b629fef5ea93aa6ea481c/layer.tar +b65ae6e742f8946fdc3fbdccb326378162641f540e606d56e1e638c7988a5b95/ +b65ae6e742f8946fdc3fbdccb326378162641f540e606d56e1e638c7988a5b95/VERSION +b65ae6e742f8946fdc3fbdccb326378162641f540e606d56e1e638c7988a5b95/json +b65ae6e742f8946fdc3fbdccb326378162641f540e606d56e1e638c7988a5b95/layer.tar +``` + +We could use redo to build a Docker image by simply making a single +`layer.tar` of the filesystem (like we did with initrd), adding a VERSION +and json file, and putting those three things into an outer taball. But if +we want a system that works as well as a Dockerfile, we'll have to make use +of multiple layers. + +Our `simple` container is already pretty tiny by container standards - 2.6MB +- but it's still a bit wasteful. Most of that space turns out to be from +the dynamic libraries we imported from the host OS. These libraries don't +change when we change Hello World! They belong in their own layer. + +Up above, in preparation for this moment, we created `libs.fs.do` to build a +separate filesystem, rather than adding the libraries inside +`simple.fs.do`, which would have been easier. Now we can make each of those +filesystems its own layer. + +There's one more complication: we did things a bit backwards. In a +Dockerfile, you install the libraries first, and then you install your +application. When you replace your application, you replace only the +topmost layer. We did it the other way around: we installed our +application and some debugging tools, then detected which libraries they +need and added a layer on top. The most recent versions of Docker, 1.10 and +above, are more efficient about handling layers changing in the middle of +the stack, but not everyone is using newer Docker versions yet, so let's try +to make things efficient for older Docker versions too. + +Luckily, since we're starting from first principles, in redo we can do +anything we want. We have to generate a tarball for each layer anyway, so +we can decide what goes into each layer and then we can put those layers in +whatever sequence we want. + +Let's start simple. A layer is just a tarball made of a set of files +(again, ignore the `try_fakeroot` stuff for now): +
+ +The magic, of course, is in deciding which files go into which layers. In +the script above, that's provided in the .list file corresponding to each +layer. The .list file is produced by `default.list.do`: +
+ +This requires a bit of explanation. First of all, you probably haven't seen +the very old, but little-known `comm` program before. It's often described +as "compare two sorted files" or "show common lines between two files." But +it actually does more than just showing common lines: it can show the lines +that are only in file #1, or only in file #2, or in both files. `comm -1 +-3` *suppresses* the output of lines that are only in #1 or that are in +both, so that it will print only the lines in the second file. + +If we want to make a `libs.layer` that contains only the files that are +*not* in `simple`, then we can use `comm -1 -3` to compare `simple` with +`libs`. + +Now, this script is supposed to be able to construct the file list for any +layer. To do that, it has to know what parent to compare each layer +against. We call that the "diffbase", and for layers that are based on +other layers, we put the name of the parent layer in its diffbase file: +
+ +(If there's no diffbase, then we use /dev/null as the diffbase. Because if +file #1 is empty, then *all* the lines are only in file #2, which is exactly +what we want.) + +There's just one more wrinkle: if we just compare lists of files, then we'll +detect newly-added files, but we won't detect *modified* files. To fix +this, we augment the file list with file checksums before the comparison +(using `fileids.py`), then strip the checksums back out in +`default.layer.do` before sending the resulting list to `cpio`. + +The augmented file list looks like this: +```shell +$ cat simple.list +. 0040755-0-0-0 +./bin 0040755-0-0-0 +./bin/busybox 0100755-0-0-ba34fb34865ba36fb9655e724266364f36155c93326b6b73f4e3d516f51f6fb2 +./bin/hello 0100755-0-0-22e4d2865e654f830f6bfc146e170846dde15185be675db4e9cd987cb02afa78 +./bin/sh 0100755-0-0-e803088e7938b328b0511957dcd0dd7b5600ec1940010c64dbd3814e3d75495f +./init 0120777-0-0-14bdc0fb069623c05620fc62589fe1f52ee6fb67a34deb447bf6f1f7e881f32a +``` + +(Side note: the augmentation needs to be added at the end of the line, not +the beginning so that the file list is still sorted afterwards. `comm` only +works correctly if both input files are sorted.) + +The script for augmenting the file list is fairly simple. It just reads a +list of filenames on stdin, checksums those files, and writes the augmented +list on stdout: +
+ +Just one more thing! Docker (before 1.10) deduplicates images by detecting +that they contain identical layers. When using a Dockerfile, the layers are +named automatically using random 256-bit numbers (UUIDs). Since Dockerfiles +usually don't regenerate earlier layers, the UUIDs of those earlier layers +won't change, so future images will contain layers with known UUIDs, so +Docker doesn't need to deduplicate them. + +We don't want to rely on never rebuilding layers. Instead, we'll adopt a +technique from newer Docker versions (post 1.10): we'll name layers after a +checksum of their contents. Now, we don't want to actually checksum the +`whatever.layer` file, because it turns out that tarballs contain a bunch of +irrelevant details, like inode numbers and +[mtimes](https://apenwarr.ca/log/20181113), so they'll have a different +checksum every time they're built. Instead, we'll make a digest of the +`whatever.list` file, which conveniently already has a checksum of each +file's contents, plus the interesting subset of the file's attributes. + +Docker expects 256-bit layer names, so we might normally generate a sha256 +digest using the `sha256sum` program, but that's not available on all +platforms. Let's write a python script to do the job instead. To make it +interesting, let's write it as a .do file, so we can generate the sha256 of +`anything` by asking for `redo-ifchange anything.sha256`. This is a good +example of how in redo, .do files can be written in any scripting language, +not just sh. +
+ +Let's test it out: +```shell +$ redo simple.list.sha256 +redo simple.list.sha256 +redo simple.list + +$ cat simple.list.sha256 +4d1fda9f598191a4bc281e5f6ac9c27493dbc8dd318e93a28b8a392a7105c145 + +$ rm -rf simple + +$ redo simple.list.sha256 +redo simple.list.sha256 +redo simple.list +redo simple.fs + +$ cat simple.list.sha256 +4d1fda9f598191a4bc281e5f6ac9c27493dbc8dd318e93a28b8a392a7105c145 +``` + +Consistent layer id across rebuilds! Perfect. + +#### Combining layers: building a Docker image + +We're almost there. Now that we can produce a tarball for each layer, we +have to produce the final tarball that contains all the layers in the right +order. For backward compatibility with older Docker versions, we also need +to produce a json "manifest" for each layer. In those old versions, each +layer was also its own container, so it needed to have all the same +attributes as a container, including a default program to run, list of open +ports, and so on. We're never going to use those values except for the +topmost layer, but they have to be there, so let's just auto-generate them. +Here's the script for customizing each layer's json file from a template: +
+ +And here's the empty template: +
+ +Now we just need to generate all the layers in a subdirectory, and tar them +together: +
+ +This requires a list of layers for each image we might want to create. +Here's the list of two layers for our `simple` container: +
+ +Finally, some people like to compress their Docker images for transport or +uploading to a repository. Here's a nice .do script that can produce the +.gz compressed version of any file: +
+ +Notice the use of `--rsyncable`. Very few people seem to know about this +gzip option, but it's immensely handy. Normally, if a few bytes change +early in a file, it completely changes gzip's output for all future bytes, +which means that incremental copying of new versions of a file (eg. using +`rsync`) is very inefficient. With `--rsyncable`, gzip does a bit of extra +work to make sure that small changes in one part of a file don't affect the +gzipped bytes later in the file, so an updated container will be able to +transfer a minimal number of bytes, even if you compress it. + +Let's try it out! +```shell +$ redo simple.image.gz +redo simple.image.gz +redo simple.image +redo libs.list.sha256 +redo libs.list +redo simple.list +redo libs.layer +3607 blocks +redo simple.list.sha256 +redo simple.layer +1569 blocks +layer: b65ae6e742f8946fdc3fbdccb326378162641f540e606d56e1e638c7988a5b95 libs +layer: 4d1fda9f598191a4bc281e5f6ac9c27493dbc8dd318e93a28b8a392a7105c145 simple + +flow:~/src/redo/docs/cookbook/container $ tar -tf simple.image.gz +4d1fda9f598191a4bc281e5f6ac9c27493dbc8dd318e93a28b8a392a7105c145/ +4d1fda9f598191a4bc281e5f6ac9c27493dbc8dd318e93a28b8a392a7105c145/VERSION +4d1fda9f598191a4bc281e5f6ac9c27493dbc8dd318e93a28b8a392a7105c145/json +4d1fda9f598191a4bc281e5f6ac9c27493dbc8dd318e93a28b8a392a7105c145/layer.tar +b65ae6e742f8946fdc3fbdccb326378162641f540e606d56e1e638c7988a5b95/ +b65ae6e742f8946fdc3fbdccb326378162641f540e606d56e1e638c7988a5b95/VERSION +b65ae6e742f8946fdc3fbdccb326378162641f540e606d56e1e638c7988a5b95/json +b65ae6e742f8946fdc3fbdccb326378162641f540e606d56e1e638c7988a5b95/layer.tar +``` + +In the above, notice how we build libs.layer first and simple.layer second, +because that's the order of the layers in `simple.image.layers`. But to +produce `libs.list` we need to compare the file list against `simple.list`, +so it declares a dependency on `simple.list`. + +The final `simple.image` tarball then includes the layers in *reverse* order +(topmost to bottommost), because that's how Docker does it. The id of the +resulting docker image is the id of the topmost layer, in this case +4d1fda9f. + +#### Loading and running a Docker image + +Phew! Okay, we finally have a completed Docker image in the format Docker +expects, and we didn't have to execute even one Dockerfile. Incidentally, +that means all of the above steps could run without having Docker installed, +and without having any permissions to talk to the local Docker daemon. +That's a pretty big improvement (in security and manageability) over running +a Dockerfile. + +The next step is to load the image into Docker, which is easy: +
+ +And finally, we can ask Docker to run our image, and capture its output like +we did, so long ago, in `default.runlocal.do` and `default.runkvm.do`: +
+ +The result is almost disappointing in its apparent simplicity: +```shell +$ time redo simple.rundocker +redo simple.rundocker +redo simple.load + +real 0m2.688s +user 0m0.068s +sys 0m0.036s + +$ cat simple.rundocker +Hello, world! +``` + +Notice that, for some reason, Docker takes 2.7s to load, launch and run our +tiny container. That's about 3x as long as it takes to boot and run a kvm +virtual machine up above with exactly the same files. This is kind of +weird, since containers are supposed to be much more lightweight than +virtual machines. I'm sure there's a very interesting explanation for this +phenomenon somewhere. For now, notice that you might save a lot of time by +initially testing your containers using `default.runlocal` (0.11 seconds) +instead of Docker (2.7 seconds), even if you intend to eventally deploy them +in Docker. + + +### A Debian-based container + +We're not done yet! We've built and run a Docker container the hard way, +but we haven't built and run an **unnecessarily wastefully huge** Docker +container the hard way. Let's do that next, by installing Debian in a +chroot, then packaging it up into a container. + +As we do that, we'll recycle almost all the redo infrastructure we built +earlier while creating our `simple` container. + +#### Interlude: Fakeroot + +It's finally time to talk about that mysterious `try_fakeroot.sh` script +that showed up a few times earlier. It looks like this: +
+ +[fakeroot](https://wiki.debian.org/FakeRoot) is a tool, originally developed +for the Debian project, that convinces your programs that they are running +as root, without actually running them as root. This is mainly so that they +can pretend to chown() files, without actually introducing security holes on +the host operating system. Debian uses this when building packages: they +compile the source, start fakeroot, install to a fakeroot directory, +make a tarball of that directory, then exit fakeroot. The tarball then +contains the permissions they want. + +Normally, fakeroot forgets all its simulated file ownership and permissions +whenever it exits. However, it has `-s` (save) and `-i` (input) options for +saving the permissions to a file and reloading the permissions from that +file, respectively. + +As we build our container layers, we need redo to continually enter +fakeroot, do some stuff, and exit it again. The `try_fakeroot.sh` script is +a helper to make that easier. + +#### Debootstrap + +The next Debian tool we should look at is +[debootstrap](https://wiki.debian.org/Debootstrap). This handy program +downloads and extracts the (supposedly) minimal packages necessary to build +an operational Debian system in a chroot-ready subdirectory. Nice! + +In order for debootstrap to work without being an administrator - and you +should not run your build system as root - we'll use fakeroot to let it +install all those packages. + +Unfortunately, debootstrap is rather slow, for two reasons: + +1. It has to download a bunch of things. +2. It has to install all those things. + +And after debootstrap has run, all we have is a Debian system, which by +itself isn't a very interesting container. (You usually want your container +to have an app so it does something specific.) + +Does this sound familiar? It sounds like a perfect candidate for Docker +layers. Let's make three layers: + +1. Download the packages. +2. Install the packages. +3. Install an app. + +Here's step one: +
+ +On top of that layer, we run the install process: +
+ +Since both steps run debootstrap and we might want to customize the set of +packages to download+install, we'll put the debootstrap options in their own +shared file: +
+ +And finally, we'll produce our "application" layer, which in this case is +just a shell script that counts then number of installed Debian packages: +
+ + +#### Building the Debian container + +Now that we have the three filesystems, let's actually generate the Docker +layers. But with a catch: we won't actually include the layer for step 1, +since all those package files will never be needed again. (Similarly, if we +were installing a compiler - and perhaps redo! - in the container so we +could build our application in a controlled environment, we might want to +omit the "install compiler" layers from the final product.) + +So we list just two layers: +
+ +And the 'debian' layer's diffbase is `debootstrap`, so we don't include the +same files twice: +
+ + +#### Running the Debian container + +This part is easy. All the parts are already in place. We'll just run +the existing `default.rundocker.do`: +```shell +$ time redo debian.rundocker +redo debian.rundocker +redo debian.load +redo debian.image +redo debian.list.sha256 +redo debian.list +redo debian.layer +12 blocks +layer: a542b5976e1329b7664d79041d982ec3d9f7949daddd73357fde17465891d51d debootstrap +layer: d5ded4835f8636fcf01f6ccad32125aaa1fe9e1827f48f64215b14066a50b9a7 debian + +real 0m7.313s +user 0m0.632s +sys 0m0.300s + +$ cat debian.rundocker +82 +``` + +It works! Apparently there are 82 Debian packages installed. It took 7.3 +seconds to load and run the docker image though, probably because it had to +transfer the full contents of those 82 packages over a socket to the docker +server, probably for security reasons, rather than just reading the files +straight from disk. Luckily, our chroot and kvm scripts also still work: +```shell +$ time redo debian.runlocal +redo debian.runlocal + +real 0m0.084s +user 0m0.052s +sys 0m0.004s + +$ cat debian.runlocal +82 + +$ time redo debian.runkvm +redo debian.runkvm +redo debian.initrd +193690 blocks +1 block +debian: kvm memory required: 346M +[ 0.375365] reboot: Power down +ok. + +real 0m3.445s +user 0m1.008s +sys 0m0.644s + +$ cat debian.runkvm +82 +``` + +#### Testing and housekeeping + +Let's finish up by providing the usual boilerplate. First, an `all.do` that +builds, runs, and tests all the images on all the container platforms. +This isn't a production build system, it's a subdirectory of the redo +package, so we'll skip softly, with a warning, if any of the components are +missing or nonfunctional. If you were doing this in a "real" system, you +could just let it abort when something is missing. +
+ +And here's a `redo clean` script that gets rid of (most of) the files +produced by the build. We say "most of" the files, because actually we +intentionally don't delete the debdownload and debootstrap directories. +Those take a really long time to build, and redo knows to rebuild them if +their dependencies (or .do files) change anyway. So instead of throwing +away their content on 'redo clean', we'll keep it around. +
+ +Still, we want a script that properly cleans up everything, so let's have +`redo xclean` (short for "extra clean") wipe out the last remaining +files: +
diff --git a/docs/cookbook/container/libs.diffbase b/docs/cookbook/container/libs.diffbase new file mode 100644 index 0000000..ab23474 --- /dev/null +++ b/docs/cookbook/container/libs.diffbase @@ -0,0 +1 @@ +simple diff --git a/docs/cookbook/container/libs.fs.do b/docs/cookbook/container/libs.fs.do new file mode 100644 index 0000000..d4a9052 --- /dev/null +++ b/docs/cookbook/container/libs.fs.do @@ -0,0 +1,21 @@ +exec >&2 + +fs=${1%.fs} +redo-ifchange simple.fs + +rm -rf "$fs" +cp -a simple/. "$fs" + +for full in "$fs"/bin/*; do + if [ -x "$full" ]; then + ldd "$full" | while read a b c junk; do + [ "$b" = "=>" ] && a=$c + if [ -e "$a" ]; then + mkdir -p "$fs/lib" "$fs/$(dirname "$a")" + cp -f "$a" "$fs/$a" + fi + done + fi +done + +redo-ifchange "$fs/bin/sh" diff --git a/docs/cookbook/container/memcalc.py b/docs/cookbook/container/memcalc.py new file mode 100755 index 0000000..2301874 --- /dev/null +++ b/docs/cookbook/container/memcalc.py @@ -0,0 +1,8 @@ +#!/usr/bin/env python +import os, sys +st = os.stat(sys.argv[1]) +megabytes = st.st_size // 1024 // 1024 +# initrd size is limited to 50% of available RAM. To be safe, we'll +# request at least 3x initrd size, and no less than 64M. +need = megabytes * 3 + 64 +print("%dM" % need) diff --git a/docs/cookbook/container/need.sh b/docs/cookbook/container/need.sh new file mode 100755 index 0000000..2fb8f2a --- /dev/null +++ b/docs/cookbook/container/need.sh @@ -0,0 +1,9 @@ +#!/bin/sh +fail=0 +for d in "$@"; do + if ! type "$d" >/dev/null 2>/dev/null; then + echo " -- missing tool: $d" >&2 + fail=1 + fi +done +exit "$fail" diff --git a/docs/cookbook/container/rdinit b/docs/cookbook/container/rdinit new file mode 100755 index 0000000..f36b1ee --- /dev/null +++ b/docs/cookbook/container/rdinit @@ -0,0 +1,5 @@ +#!/bin/sh +busybox mount -t devtmpfs none /dev +/init >/dev/ttyS1 +echo $? >/dev/ttyS2 +busybox poweroff -f diff --git a/docs/cookbook/container/simple.fs.do b/docs/cookbook/container/simple.fs.do new file mode 100644 index 0000000..c419f1c --- /dev/null +++ b/docs/cookbook/container/simple.fs.do @@ -0,0 +1,25 @@ +exec >&2 + +# We can pull in the 'hello' binary built in an earlier +# example. Notice that it's safe to have dependencies +# that cross directory boundaries, even when we're building +# both of those directories in parallel. +FILES=" + /bin/sh + ../hello/hello +" +if [ -x /bin/busybox ]; then + # Optional, except for runkvm + FILES="$FILES /bin/busybox" +else + redo-ifcreate /bin/busybox +fi +redo-ifchange $FILES + +fs=${1%.fs} +rm -rf "$fs" +mkdir -p "$fs/bin" +cp $FILES "$fs/bin/" +ln -s bin/hello "$fs/init" + +redo-ifchange "$fs/bin/sh" diff --git a/docs/cookbook/container/simple.image.layers b/docs/cookbook/container/simple.image.layers new file mode 100644 index 0000000..9a124ca --- /dev/null +++ b/docs/cookbook/container/simple.image.layers @@ -0,0 +1,2 @@ +libs +simple diff --git a/docs/cookbook/container/template.json b/docs/cookbook/container/template.json new file mode 100644 index 0000000..254a58c --- /dev/null +++ b/docs/cookbook/container/template.json @@ -0,0 +1,62 @@ +{ + "architecture": "amd64", + "comment": "Imported from -", + "config": { + "AttachStderr": false, + "AttachStdin": false, + "AttachStdout": false, + "Cmd": [ + "/init" + ], + "CpuShares": 0, + "Cpuset": "", + "Domainname": "", + "Entrypoint": null, + "Env": null, + "ExposedPorts": null, + "Hostname": "", + "Image": "", + "Labels": null, + "MacAddress": "", + "Memory": 0, + "MemorySwap": 0, + "NetworkDisabled": false, + "OnBuild": null, + "OpenStdin": false, + "PortSpecs": null, + "StdinOnce": false, + "Tty": false, + "User": "", + "Volumes": null, + "WorkingDir": "" + }, + "container_config": { + "AttachStderr": false, + "AttachStdin": false, + "AttachStdout": false, + "Cmd": null, + "CpuShares": 0, + "Cpuset": "", + "Domainname": "", + "Entrypoint": null, + "Env": null, + "ExposedPorts": null, + "Hostname": "", + "Image": "", + "Labels": null, + "MacAddress": "", + "Memory": 0, + "MemorySwap": 0, + "NetworkDisabled": false, + "OnBuild": null, + "OpenStdin": false, + "PortSpecs": null, + "StdinOnce": false, + "Tty": false, + "User": "", + "Volumes": null, + "WorkingDir": "" + }, + "docker_version": "1.6.2", + "os": "linux" +} diff --git a/docs/cookbook/container/try_fakeroot.sh b/docs/cookbook/container/try_fakeroot.sh new file mode 100755 index 0000000..de0af90 --- /dev/null +++ b/docs/cookbook/container/try_fakeroot.sh @@ -0,0 +1,13 @@ +#!/bin/sh +frfile=$1 +shift +broken= +fakeroot true 2>/dev/null || broken=1 +if [ -z "$broken" ] && [ -e "$frfile" ]; then + redo-ifchange "$frfile" + exec fakeroot -i "$frfile" "$@" +elif [ -z "$broken" ]; then + exec fakeroot "$@" +else + exec "$@" +fi diff --git a/docs/cookbook/container/xclean.do b/docs/cookbook/container/xclean.do new file mode 100644 index 0000000..4770bb0 --- /dev/null +++ b/docs/cookbook/container/xclean.do @@ -0,0 +1,3 @@ +redo clean +rm -rf debootstrap debdownload *.fakeroot + diff --git a/docs/t/.gitignore b/docs/t/.gitignore new file mode 100644 index 0000000..500303a --- /dev/null +++ b/docs/t/.gitignore @@ -0,0 +1,2 @@ +/*.1 +/*.html diff --git a/mkdocs.yml b/mkdocs.yml index 21ca880..30152c1 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -12,6 +12,11 @@ extra_css: plugins: - exclude: glob: + - cookbook/container/simple/* + - cookbook/container/debdownload/* + - cookbook/container/debootstrap/* + - cookbook/container/debian/* + - cookbook/container/*.fakeroot - "t/*" - "*.tmp" - "*.gz" @@ -29,8 +34,9 @@ nav: - Roadmap.md - Cookbook: - Hello World (hello.do, redo-ifchange): cookbook/hello/index.md - - Text processing example (default.do, redo-whichdo, redo-always, redo-stamp): cookbook/defaults/index.md - - R plots and LaTeX to pdf (side effects, multiple outputs, autodepends): cookbook/latex/index.md + - Text substitution (default.do, redo-always, redo-stamp): cookbook/defaults/index.md + - R plots and LaTeX to pdf (side effects, multiple outputs): cookbook/latex/index.md + - Docker and kvm containers (from scratch): cookbook/container/index.md - FAQ: - Basics: FAQBasics.md - Semantics: FAQSemantics.md