From 8dad22322588a9620c934edb9ae8d332e16f5a28 Mon Sep 17 00:00:00 2001 From: Avery Pennarun Date: Wed, 8 Dec 2010 20:16:26 -0800 Subject: [PATCH 01/20] flush-cache: run it as a separate program, not using 'source' That way it doesn't clutter up 'redo -x' as much. --- t/chdirtest.do | 6 +++--- t/deps/basic/test.do | 4 ++-- t/deps/dirtest/test.do | 6 +++--- t/deps/doublestatic.do | 2 +- t/deps/gentest.do | 6 +++--- t/deps/test1.do | 1 + t/flush-cache.sh | 1 + t/makedir2.do | 2 +- 8 files changed, 15 insertions(+), 13 deletions(-) mode change 100644 => 100755 t/flush-cache.sh diff --git a/t/chdirtest.do b/t/chdirtest.do index 302305c..ac0fe17 100644 --- a/t/chdirtest.do +++ b/t/chdirtest.do @@ -2,15 +2,15 @@ rm -f chdir1 redo chdir2 redo chdir3 -. ./flush-cache.sh +./flush-cache.sh redo-ifchange chdir3 rm -f chdir1 -. ./flush-cache.sh +./flush-cache.sh redo-ifchange chdir3 [ -e chdir1 ] || exit 77 rm -f chdir1 -. ./flush-cache.sh +./flush-cache.sh redo-ifchange chdir3 [ -e chdir1 ] || exit 78 diff --git a/t/deps/basic/test.do b/t/deps/basic/test.do index 90b9126..5bc14c3 100644 --- a/t/deps/basic/test.do +++ b/t/deps/basic/test.do @@ -1,10 +1,10 @@ rm -f *.out *.log -. ../../flush-cache.sh +../../flush-cache.sh redo-ifchange 1.out 2.out [ "$(cat 1.log | wc -l)" = 1 ] || exit 55 [ "$(cat 2.log | wc -l)" = 1 ] || exit 56 -. ../../flush-cache.sh +../../flush-cache.sh touch 1.in redo-ifchange 1.out 2.out [ "$(cat 1.log | wc -l)" = 2 ] || exit 57 diff --git a/t/deps/dirtest/test.do b/t/deps/dirtest/test.do index 2c5f8ee..c2fd244 100644 --- a/t/deps/dirtest/test.do +++ b/t/deps/dirtest/test.do @@ -1,11 +1,11 @@ rm -f log dir1/log dir1/stinky touch t1.do -. ../../flush-cache.sh +../../flush-cache.sh redo t1 touch t1.do -. ../../flush-cache.sh +../../flush-cache.sh redo t1 -. ../../flush-cache.sh +../../flush-cache.sh redo-ifchange t1 C1="$(wc -l genfile2.do -. ../flush-cache.sh +../flush-cache.sh redo genfile1 # this will cause a rebuild: # genfile1 depends on genfile2 depends on genfile2.do rm -f genfile2.do -. ../flush-cache.sh +../flush-cache.sh redo-ifchange genfile1 # but genfile2.do was gone last time, so genfile2 no longer depends on it. # thus, it can be considered up-to-date. Prior versions of redo had a bug # where the dependency on genfile2.do was never dropped. -. ../flush-cache.sh +../flush-cache.sh redo-ifchange genfile1 COUNT=$(wc -l &2 find "$REDO_BASE/.redo" -name 'built^*' -o -name 'mark^*' | xargs rm -f >&2 diff --git a/t/makedir2.do b/t/makedir2.do index 9918b69..e4673b3 100644 --- a/t/makedir2.do +++ b/t/makedir2.do @@ -1,7 +1,7 @@ rm -f makedir.log redo makedir touch makedir/outfile -. ./flush-cache.sh +./flush-cache.sh redo-ifchange makedir COUNT=$(wc -l Date: Tue, 7 Dec 2010 02:17:22 -0800 Subject: [PATCH 02/20] Switch state.py to use sqlite3 instead of filesystem-based stamps. It passes all tests when run serialized, but still gives weird errors (OperationalError: database is locked) when run with -j5. sqlite3 shouldn't be barfing just because the database is locked, since the default timeout is 5 seconds, and it's dying *way* faster than that. --- builder.py | 42 ++++--- redo-ifchange.py | 47 ++++---- redo-ifcreate.py | 3 +- state.py | 277 +++++++++++++++++++++++++++-------------------- t/flush-cache.sh | 7 +- vars.py | 1 + 6 files changed, 220 insertions(+), 157 deletions(-) diff --git a/builder.py b/builder.py index eeeccbb..c36b13a 100644 --- a/builder.py +++ b/builder.py @@ -20,10 +20,10 @@ def _find_do_file(t): for dofile,basename,ext in _possible_do_files(t): debug2('%s: %s ?\n' % (t, dofile)) if os.path.exists(dofile): - state.add_dep(t, 'm', dofile) + state.File(name=t).add_dep('m', dofile) return dofile,basename,ext else: - state.add_dep(t, 'c', dofile) + state.File(name=t).add_dep('c', dofile) return None,None,None @@ -53,12 +53,13 @@ class BuildJob: def start(self): assert(self.lock.owned) t = self.t + f = state.File(name=t) tmpname = self.tmpname if not self.shouldbuildfunc(t): # target doesn't need to be built; skip the whole task return self._after2(0) if (os.path.exists(t) and not os.path.exists(t + '/.') - and not state.is_generated(t)): + and not f.is_generated): # an existing source file that was not generated by us. # This step is mentioned by djb in his notes. # For example, a rule called default.c.do could be used to try @@ -67,20 +68,21 @@ class BuildJob: # FIXME: always refuse to redo any file that was modified outside # of redo? That would make it easy for someone to override a # file temporarily, and could be undone by deleting the file. - state.unmark_as_generated(t) - state.stamp_and_maybe_built(t) + debug2("-- static (%r)\n" % t) + f.set_static() + f.save() return self._after2(0) - state.start(t) + f.zap_deps() (dofile, basename, ext) = _find_do_file(t) if not dofile: if os.path.exists(t): - state.unmark_as_generated(t) - state.stamp_and_maybe_built(t) + f.is_generated = False + f.set_static() + f.save() return self._after2(0) else: err('no rule to make %r\n' % t) return self._after2(1) - state.stamp_and_maybe_built(dofile) unlink(tmpname) ffd = os.open(tmpname, os.O_CREAT|os.O_RDWR|os.O_EXCL, 0666) close_on_exec(ffd, True) @@ -97,13 +99,19 @@ class BuildJob: if vars.VERBOSE or vars.XTRACE: log_('\n') log('%s\n' % _nice(t)) self.argv = argv + f.is_generated = True + f.save() + dof = state.File(name=dofile) + dof.set_static() + dof.save() jwack.start_job(t, self._do_subproc, self._after) def _do_subproc(self): # careful: REDO_PWD was the PWD relative to the STARTPATH at the time # we *started* building the current target; but that target ran # redo-ifchange, and it might have done it from a different directory - # than we started it in. So os.getcwd() might be != REDO_PWD right now. + # than we started it in. So os.getcwd() might be != REDO_PWD right + # now. dn = os.path.dirname(self.t) newp = os.path.realpath(dn) os.environ['REDO_PWD'] = state.relpath(newp, vars.STARTDIR) @@ -153,11 +161,17 @@ class BuildJob: os.rename(tmpname, t) else: unlink(tmpname) - state.built(t) - state.stamp(t) + sf = state.File(name=t) + sf.is_generated=True + sf.update_stamp() + sf.set_changed() + sf.save() else: unlink(tmpname) - state.unstamp(t) + sf = state.File(name=t) + sf.stamp = None + sf.set_changed() + sf.save() f.close() if rv != 0: err('%s: exit code %d\n' % (_nice(t),rv)) @@ -226,7 +240,7 @@ def main(targets, shouldbuildfunc): assert(lock.owned) if vars.DEBUG_LOCKS: log('%s (...unlocked!)\n' % _nice(t)) - if state.stamped(t) == None: + if state.File(name=t).stamp == None: err('%s: failed in another thread\n' % _nice(t)) retcode[0] = 2 lock.unlock() diff --git a/redo-ifchange.py b/redo-ifchange.py index 7182b6d..dde061e 100755 --- a/redo-ifchange.py +++ b/redo-ifchange.py @@ -4,58 +4,59 @@ import vars, state, builder, jwack from helpers import debug, debug2, err, mkdirp, unlink -def dirty_deps(t, depth): - try: - st = os.stat(t) - realtime = st.st_mtime - except OSError: - st = None - realtime = 0 - - debug('%s?%s\n' % (depth, t)) - if state.isbuilt(t): +def dirty_deps(f, depth, max_changed): + debug('%s?%s\n' % (depth, f.name)) + + if f.changed_runid == None: + debug('%s-- DIRTY (never built)\n' % depth) + return True + if f.changed_runid > max_changed: debug('%s-- DIRTY (built)\n' % depth) - return True # has already been built during this session - if state.ismarked(t): - debug('%s-- CLEAN (marked)\n' % depth) + return True # has been built more recently than parent + if f.is_checked(): + debug('%s-- CLEAN (checked)\n' % depth) return False # has already been checked during this session - - stamptime = state.stamped(t) - if stamptime == None: + + if not f.stamp: debug('%s-- DIRTY (no stamp)\n' % depth) return True - if stamptime != realtime and not (st and stat.S_ISDIR(st.st_mode)): + if f.stamp != f.read_stamp(): debug('%s-- DIRTY (mtime)\n' % depth) return True - for mode,name in state.deps(t): + for mode,name in f.deps(): if mode == 'c': if os.path.exists(name): debug('%s-- DIRTY (created)\n' % depth) return True elif mode == 'm': - if dirty_deps(os.path.join(vars.BASE, name), depth + ' '): + f2 = state.File(name=os.path.join(vars.BASE, name)) + if dirty_deps(f2, depth = depth + ' ', + max_changed = f.changed_runid): debug('%s-- DIRTY (sub)\n' % depth) - state.unstamp(t) # optimization for future callers return True - state.mark(t) + f.set_checked() + f.save() return False def should_build(t): - return not state.isbuilt(t) and dirty_deps(t, depth = '') + f = state.File(name=t) + return dirty_deps(f, depth = '', max_changed = vars.RUNID) rv = 202 try: me = os.path.join(vars.STARTDIR, os.path.join(vars.PWD, vars.TARGET)) + f = state.File(name=me) debug2('TARGET: %r %r %r\n' % (vars.STARTDIR, vars.PWD, vars.TARGET)) try: targets = sys.argv[1:] for t in targets: - state.add_dep(me, 'm', t) + f.add_dep('m', t) + f.save() rv = builder.main(targets, should_build) finally: jwack.force_return_tokens() diff --git a/redo-ifcreate.py b/redo-ifcreate.py index 2794888..9d862c9 100755 --- a/redo-ifcreate.py +++ b/redo-ifcreate.py @@ -5,11 +5,12 @@ from helpers import err, mkdirp try: + me = state.File(name=vars.TARGET) for t in sys.argv[1:]: if os.path.exists(t): err('redo-ifcreate: error: %r already exists\n' % t) sys.exit(1) else: - state.add_dep(vars.TARGET, 'c', t) + me.add_dep('c', t) except KeyboardInterrupt: sys.exit(200) diff --git a/state.py b/state.py index 2986c15..17a9e35 100644 --- a/state.py +++ b/state.py @@ -1,20 +1,71 @@ -import sys, os, errno, glob +import sys, os, errno, glob, stat, sqlite3 import vars from helpers import unlink, err, debug2, debug3, mkdirp, close_on_exec +SCHEMA_VER=7 + +_db = None +def db(): + global _db + if _db: + return _db + dbdir = '%s/.redo' % vars.BASE + dbfile = '%s/db.sqlite3' % dbdir + mkdirp(dbdir) + must_create = not os.path.exists(dbfile) + if not must_create: + _db = sqlite3.connect(dbfile) + try: + row = _db.cursor().execute("select version from Schema").fetchone() + except sqlite3.OperationalError: + row = None + ver = row and row[0] or None + if ver != SCHEMA_VER: + err("state database: discarding v%s (wanted v%s)\n" + % (ver, SCHEMA_VER)) + must_create = True + _db = None + if must_create: + unlink(dbfile) + _db = sqlite3.connect(dbfile) + _db.execute("create table Schema (version int)") + _db.execute("create table Runid " + " (id integer primary key autoincrement)") + _db.execute("create table Files (" + " name not null primary key, " + " is_generated int, " + " checked_runid int, " + " changed_runid int, " + " stamp, csum)") + _db.execute("create table Deps " + " (target int, source int, mode not null, primary key (target,source))") + #_db.execute("create unique index Files_name on Files (name)") + #_db.execute("create unique index Deps_ix on Deps (target, source)") + _db.execute("create index Deps_src on Deps (source)") + _db.execute("insert into Schema (version) values (?)", [SCHEMA_VER]) + _db.execute("insert into Runid default values") + _db.execute("insert into Runid default values") + _db.commit() + + if not vars.RUNID: + _db.execute("insert into Runid default values") + _db.commit() + vars.RUNID = _db.execute("select last_insert_rowid()").fetchone()[0] + os.environ['REDO_RUNID'] = str(vars.RUNID) + + _db.execute("pragma journal_mode = PERSIST") + _db.execute("pragma synchronous = off") + return _db + def init(): # FIXME: just wiping out all the locks is kind of cheating. But we # only do this from the toplevel redo process, so unless the user # deliberately starts more than one redo on the same repository, it's # sort of ok. - mkdirp('%s/.redo' % vars.BASE) + db() for f in glob.glob('%s/.redo/lock*' % vars.BASE): os.unlink(f) - for f in glob.glob('%s/.redo/mark^*' % vars.BASE): - os.unlink(f) - for f in glob.glob('%s/.redo/built^*' % vars.BASE): - os.unlink(f) _insane = None @@ -46,7 +97,7 @@ def relpath(t, base): return '/'.join(tparts) -def _sname(typ, t): +def xx_sname(typ, t): # FIXME: t.replace(...) is non-reversible and non-unique here! tnew = relpath(t, vars.BASE) v = vars.BASE + ('/.redo/%s^%s' % (typ, tnew.replace('/', '^'))) @@ -55,128 +106,120 @@ def _sname(typ, t): return v -def add_dep(t, mode, dep): - sn = _sname('dep', t) - reldep = relpath(dep, vars.BASE) - debug2('add-dep: %r < %s %r\n' % (sn, mode, reldep)) +class File(object): + __slots__ = ['id', 'name', 'is_generated', + 'checked_runid', 'changed_runid', + 'stamp', 'csum'] - open(sn, 'a').write('%s %s\n' % (mode, reldep)) - - -def deps(t): - for line in open(_sname('dep', t)).readlines(): - assert(line[0] in ('c','m')) - assert(line[1] == ' ') - assert(line[-1] == '\n') - mode = line[0] - name = line[2:-1] - yield mode,name - - -def _stampname(t): - return _sname('stamp', t) - - -def stamp(t): - mark(t) - stampfile = _stampname(t) - newstampfile = _sname('stamp' + str(os.getpid()), t) - depfile = _sname('dep', t) - if not os.path.exists(vars.BASE + '/.redo'): - # .redo might not exist in a 'make clean' target - return - open(newstampfile, 'w').close() - try: - mtime = os.stat(t).st_mtime - except OSError: - mtime = 0 - os.utime(newstampfile, (mtime, mtime)) - os.rename(newstampfile, stampfile) - open(depfile, 'a').close() - - -def unstamp(t): - unlink(_stampname(t)) - unlink(_sname('dep', t)) - - -def unmark_as_generated(t): - unstamp(t) - unlink(_sname('gen', t)) - - -def stamped(t): - try: - stamptime = os.stat(_stampname(t)).st_mtime - except OSError, e: - if e.errno == errno.ENOENT: - return None + def __init__(self, id=None, name=None): + q = ('select rowid, name, is_generated, checked_runid, changed_runid, ' + ' stamp, csum ' + ' from Files ') + if id != None: + q += 'where rowid=?' + l = [id] + elif name != None: + name = relpath(name, vars.BASE) + q += 'where name=?' + l = [name] else: - raise - return stamptime + raise Exception('name or id must be set') + d = db() + row = d.execute(q, l).fetchone() + if not row: + if not name: + raise Exception('File with id=%r not found and ' + 'name not given' % id) + d.execute('insert into Files (name) values (?)', [name]) + d.commit() + row = d.execute(q, l).fetchone() + assert(row) + (self.id, self.name, self.is_generated, + self.checked_runid, self.changed_runid, + self.stamp, self.csum) = row + def save(self): + if not os.path.exists('%s/.redo' % vars.BASE): + # this might happen if 'make clean' removes the .redo dir + return + d = db() + d.execute('update Files set ' + ' is_generated=?, checked_runid=?, changed_runid=?, ' + ' stamp=?, csum=? ' + ' where rowid=?', + [self.is_generated, self.checked_runid, self.changed_runid, + self.stamp, self.csum, + self.id]) + d.commit() -def built(t): - try: - open(_sname('built', t), 'w').close() - except IOError, e: - if e.errno == errno.ENOENT: - pass # may happen if someone deletes our .redo dir - else: - raise - - -_builts = {} -def isbuilt(t): - if _builts.get(t): - return True - if os.path.exists(_sname('built', t)): - _builts[t] = True - return True - - -# stamps the given input file, but only considers it to have been "built" if its -# mtime has changed. This is useful for static (non-generated) files. -def stamp_and_maybe_built(t): - if stamped(t) != os.stat(t).st_mtime: - built(t) - stamp(t) - + def set_checked(self): + self.checked_runid = vars.RUNID -def mark(t): - try: - open(_sname('mark', t), 'w').close() - except IOError, e: - if e.errno == errno.ENOENT: - pass # may happen if someone deletes our .redo dir + def set_changed(self): + debug2('BUILT: %r (%r)\n' % (self.name, self.stamp)) + self.changed_runid = vars.RUNID + + def set_static(self): + self.update_stamp() + + def update_stamp(self): + newstamp = self.read_stamp() + if newstamp != self.stamp: + debug2("STAMP: %s: %r -> %r\n" % (self.name, self.stamp, newstamp)) + self.stamp = newstamp + self.set_changed() + + def is_changed(self): + return self.changed_runid and self.changed_runid >= vars.RUNID + + def is_checked(self): + return (self.checked_runid and self.checked_runid >= vars.RUNID + and not (self.changed_runid + and self.changed_runid >= self.checked_runid)) + + def deps(self): + q = "select mode, source from Deps where target=?" + for mode,source_id in db().execute(q, [self.id]): + assert(mode in ('c', 'm')) + name = File(id=source_id).name + yield mode,name + + def zap_deps(self): + debug2('zap-deps: %r\n' % self.name) + db().execute('delete from Deps where target=?', [self.id]) + db().commit() + + def add_dep(self, mode, dep): + src = File(name=dep) + reldep = relpath(dep, vars.BASE) + debug2('add-dep: %r < %s %r\n' % (self.name, mode, reldep)) + assert(src.name == reldep) + d = db() + d.execute("delete from Deps where target=? and source=?", + [self.id, src.id]) + d.execute("insert into Deps " + " (target, mode, source) values (?,?,?)", + [self.id, mode, src.id]) + d.commit() + + def read_stamp(self): + try: + st = os.stat(os.path.join(vars.BASE, self.name)) + except OSError: + return '0' # does not exist + if stat.S_ISDIR(st.st_mode): + return 'dir' # the timestamp of a directory is meaningless else: - raise - - -_marks = {} -def ismarked(t): - if _marks.get(t): - return True - if os.path.exists(_sname('mark', t)): - _marks[t] = True - return True - - -def is_generated(t): - return os.path.exists(_sname('gen', t)) - - -def start(t): - unstamp(t) - open(_sname('dep', t), 'w').close() - open(_sname('gen', t), 'w').close() # it's definitely a generated file + # a "unique identifier" stamp for a regular file + return str((st.st_ctime, st.st_mtime, st.st_size, st.st_ino)) + class Lock: def __init__(self, t): self.owned = False self.rfd = self.wfd = None - self.lockname = _sname('lock', t) + self.lockname = xx_sname('lock', t) def __del__(self): if self.owned: diff --git a/t/flush-cache.sh b/t/flush-cache.sh index 4f8f8f9..318838e 100755 --- a/t/flush-cache.sh +++ b/t/flush-cache.sh @@ -1,4 +1,7 @@ #!/bin/sh #echo "Flushing redo cache..." >&2 -find "$REDO_BASE/.redo" -name 'built^*' -o -name 'mark^*' | - xargs rm -f >&2 +( + echo "update Files set checked_runid=null;" + echo "update Files set changed_runid=changed_runid-1;" + #echo "update Files set stamp='dirty' where id in (select distinct target from Deps);" +) | sqlite3 "$REDO_BASE/.redo/db.sqlite3" diff --git a/vars.py b/vars.py index 54421a0..0547030 100644 --- a/vars.py +++ b/vars.py @@ -18,6 +18,7 @@ XTRACE = os.environ.get('REDO_XTRACE', '') and 1 or 0 KEEP_GOING = os.environ.get('REDO_KEEP_GOING', '') and 1 or 0 SHUFFLE = os.environ.get('REDO_SHUFFLE', '') and 1 or 0 STARTDIR = os.environ['REDO_STARTDIR'] +RUNID = atoi.atoi(os.environ.get('REDO_RUNID')) or None BASE = os.environ['REDO_BASE'] while BASE and BASE.endswith('/'): BASE = BASE[:-1] From 9e36106642c27e4c9cde2f5b2f6e67b41bb6a8a0 Mon Sep 17 00:00:00 2001 From: Avery Pennarun Date: Wed, 8 Dec 2010 21:40:42 -0800 Subject: [PATCH 03/20] sqlite3: configure the timeout explicitly. In flush-cache.sh, we have to do this, because the sqlite3 command-line tool sets it to zero. Inevitably during parallel testing, it'll end up contending for a lock, and we really want it to wait a bit. In state.py, it's not as important since the default is nonzero. But python-sqlite3's default of 5 seconds makes me a little too nervous; I can imagine a disk write waiting for more than 5 seconds sometime. So let's use 60 instead. --- state.py | 5 +++-- t/flush-cache.sh | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/state.py b/state.py index 17a9e35..86a7b0d 100644 --- a/state.py +++ b/state.py @@ -3,6 +3,7 @@ import vars from helpers import unlink, err, debug2, debug3, mkdirp, close_on_exec SCHEMA_VER=7 +TIMEOUT=60 _db = None def db(): @@ -14,7 +15,7 @@ def db(): mkdirp(dbdir) must_create = not os.path.exists(dbfile) if not must_create: - _db = sqlite3.connect(dbfile) + _db = sqlite3.connect(dbfile, timeout=TIMEOUT) try: row = _db.cursor().execute("select version from Schema").fetchone() except sqlite3.OperationalError: @@ -27,7 +28,7 @@ def db(): _db = None if must_create: unlink(dbfile) - _db = sqlite3.connect(dbfile) + _db = sqlite3.connect(dbfile, timeout=TIMEOUT) _db.execute("create table Schema (version int)") _db.execute("create table Runid " " (id integer primary key autoincrement)") diff --git a/t/flush-cache.sh b/t/flush-cache.sh index 318838e..e464c96 100755 --- a/t/flush-cache.sh +++ b/t/flush-cache.sh @@ -1,6 +1,7 @@ #!/bin/sh #echo "Flushing redo cache..." >&2 ( + echo ".timeout 5000" echo "update Files set checked_runid=null;" echo "update Files set changed_runid=changed_runid-1;" #echo "update Files set stamp='dirty' where id in (select distinct target from Deps);" From f4535be0cdeea29e7abd421b2e43ea4f2f7229e4 Mon Sep 17 00:00:00 2001 From: Avery Pennarun Date: Thu, 9 Dec 2010 00:45:13 -0800 Subject: [PATCH 04/20] Fix a deadlock. We were holding a database open with a read lock while a child redo might need to open it with a write lock. --- state.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/state.py b/state.py index 86a7b0d..3f56248 100644 --- a/state.py +++ b/state.py @@ -180,7 +180,7 @@ class File(object): def deps(self): q = "select mode, source from Deps where target=?" - for mode,source_id in db().execute(q, [self.id]): + for mode,source_id in db().execute(q, [self.id]).fetchall(): assert(mode in ('c', 'm')) name = File(id=source_id).name yield mode,name From b86a32d33d21346dbe1d9542ff8843525d796a01 Mon Sep 17 00:00:00 2001 From: Avery Pennarun Date: Thu, 9 Dec 2010 01:43:13 -0800 Subject: [PATCH 05/20] flush-cache.sh: for speed, disable sqlite's synchronous mode. --- t/flush-cache.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/t/flush-cache.sh b/t/flush-cache.sh index e464c96..4e04d03 100755 --- a/t/flush-cache.sh +++ b/t/flush-cache.sh @@ -2,7 +2,7 @@ #echo "Flushing redo cache..." >&2 ( echo ".timeout 5000" - echo "update Files set checked_runid=null;" - echo "update Files set changed_runid=changed_runid-1;" - #echo "update Files set stamp='dirty' where id in (select distinct target from Deps);" + echo "pragma synchronous = off;" + echo "update Files set checked_runid=null, " \ + " changed_runid=changed_runid-1;" ) | sqlite3 "$REDO_BASE/.redo/db.sqlite3" From c339359f04cb77fadfa28902ddd8509429cf4404 Mon Sep 17 00:00:00 2001 From: Avery Pennarun Date: Thu, 9 Dec 2010 01:56:17 -0800 Subject: [PATCH 06/20] Schema cleanup. --- state.py | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/state.py b/state.py index 3f56248..1f6e49b 100644 --- a/state.py +++ b/state.py @@ -2,7 +2,7 @@ import sys, os, errno, glob, stat, sqlite3 import vars from helpers import unlink, err, debug2, debug3, mkdirp, close_on_exec -SCHEMA_VER=7 +SCHEMA_VER=1 TIMEOUT=60 _db = None @@ -29,23 +29,24 @@ def db(): if must_create: unlink(dbfile) _db = sqlite3.connect(dbfile, timeout=TIMEOUT) - _db.execute("create table Schema (version int)") + _db.execute("create table Schema " + " (version int)") _db.execute("create table Runid " " (id integer primary key autoincrement)") - _db.execute("create table Files (" - " name not null primary key, " - " is_generated int, " - " checked_runid int, " - " changed_runid int, " - " stamp, csum)") + _db.execute("create table Files " + " (name not null primary key, " + " is_generated int, " + " checked_runid int, " + " changed_runid int, " + " stamp, " + " csum)") _db.execute("create table Deps " - " (target int, source int, mode not null, primary key (target,source))") - #_db.execute("create unique index Files_name on Files (name)") - #_db.execute("create unique index Deps_ix on Deps (target, source)") - _db.execute("create index Deps_src on Deps (source)") + " (target int, " + " source int, " + " mode not null, " + " primary key (target,source))") _db.execute("insert into Schema (version) values (?)", [SCHEMA_VER]) - _db.execute("insert into Runid default values") - _db.execute("insert into Runid default values") + _db.execute("insert into Runid default values") # eat the '0' runid _db.commit() if not vars.RUNID: From fb798515302b0a63403289ba0480e98c1246d481 Mon Sep 17 00:00:00 2001 From: Avery Pennarun Date: Thu, 9 Dec 2010 02:13:36 -0800 Subject: [PATCH 07/20] Calculate dependencies with fewer sqlite queries. --- redo-ifchange.py | 5 ++--- state.py | 27 +++++++++++++++++++-------- 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/redo-ifchange.py b/redo-ifchange.py index dde061e..5267cfb 100755 --- a/redo-ifchange.py +++ b/redo-ifchange.py @@ -25,13 +25,12 @@ def dirty_deps(f, depth, max_changed): debug('%s-- DIRTY (mtime)\n' % depth) return True - for mode,name in f.deps(): + for mode,f2 in f.deps(): if mode == 'c': - if os.path.exists(name): + if os.path.exists(os.path.join(vars.BASE, f2.name)): debug('%s-- DIRTY (created)\n' % depth) return True elif mode == 'm': - f2 = state.File(name=os.path.join(vars.BASE, name)) if dirty_deps(f2, depth = depth + ' ', max_changed = f.changed_runid): debug('%s-- DIRTY (sub)\n' % depth) diff --git a/state.py b/state.py index 1f6e49b..a449271 100644 --- a/state.py +++ b/state.py @@ -112,8 +112,15 @@ class File(object): __slots__ = ['id', 'name', 'is_generated', 'checked_runid', 'changed_runid', 'stamp', 'csum'] + + def _init_from_cols(self, cols): + (self.id, self.name, self.is_generated, + self.checked_runid, self.changed_runid, + self.stamp, self.csum) = cols - def __init__(self, id=None, name=None): + def __init__(self, id=None, name=None, cols=None): + if cols: + return self._init_from_cols(cols) q = ('select rowid, name, is_generated, checked_runid, changed_runid, ' ' stamp, csum ' ' from Files ') @@ -136,9 +143,7 @@ class File(object): d.commit() row = d.execute(q, l).fetchone() assert(row) - (self.id, self.name, self.is_generated, - self.checked_runid, self.changed_runid, - self.stamp, self.csum) = row + self._init_from_cols(row) def save(self): if not os.path.exists('%s/.redo' % vars.BASE): @@ -180,11 +185,17 @@ class File(object): and self.changed_runid >= self.checked_runid)) def deps(self): - q = "select mode, source from Deps where target=?" - for mode,source_id in db().execute(q, [self.id]).fetchall(): + q = ('select Deps.mode, Deps.source, ' + ' name, is_generated, checked_runid, changed_runid, ' + ' stamp, csum ' + ' from Files ' + ' join Deps on Files.rowid = Deps.source ' + ' where target=?') + for row in db().execute(q, [self.id]).fetchall(): + mode = row[0] + cols = row[1:] assert(mode in ('c', 'm')) - name = File(id=source_id).name - yield mode,name + yield mode,File(cols=cols) def zap_deps(self): debug2('zap-deps: %r\n' % self.name) From 29d6c9a7466061abffc9822fffdd66cfd715d6ef Mon Sep 17 00:00:00 2001 From: Avery Pennarun Date: Thu, 9 Dec 2010 02:44:33 -0800 Subject: [PATCH 08/20] Don't db.commit() so frequently. Just commit when we're about to do something blocking. sqlite goes a lot faster with bigger transactions. This change does show a small percentage speedup in tests, but not as much as I'd like. --- builder.py | 4 ++++ state.py | 53 +++++++++++++++++++++++++++++++---------------------- 2 files changed, 35 insertions(+), 22 deletions(-) diff --git a/builder.py b/builder.py index c36b13a..b63fdb0 100644 --- a/builder.py +++ b/builder.py @@ -104,6 +104,7 @@ class BuildJob: dof = state.File(name=dofile) dof.set_static() dof.save() + state.commit() jwack.start_job(t, self._do_subproc, self._after) def _do_subproc(self): @@ -184,6 +185,7 @@ class BuildJob: try: self.donefunc(self.t, rv) assert(self.lock.owned) + state.commit() finally: self.lock.unlock() @@ -205,6 +207,7 @@ def main(targets, shouldbuildfunc): # In the first cycle, we just build as much as we can without worrying # about any lock contention. If someone else has it locked, we move on. for t in targets: + state.commit() jwack.get_token(t) if retcode[0] and not vars.KEEP_GOING: break @@ -246,4 +249,5 @@ def main(targets, shouldbuildfunc): lock.unlock() else: BuildJob(t, lock, shouldbuildfunc, done).start() + state.commit() return retcode[0] diff --git a/state.py b/state.py index a449271..661ee3d 100644 --- a/state.py +++ b/state.py @@ -1,6 +1,7 @@ import sys, os, errno, glob, stat, sqlite3 import vars from helpers import unlink, err, debug2, debug3, mkdirp, close_on_exec +import helpers SCHEMA_VER=1 TIMEOUT=60 @@ -47,16 +48,15 @@ def db(): " primary key (target,source))") _db.execute("insert into Schema (version) values (?)", [SCHEMA_VER]) _db.execute("insert into Runid default values") # eat the '0' runid - _db.commit() if not vars.RUNID: _db.execute("insert into Runid default values") - _db.commit() vars.RUNID = _db.execute("select last_insert_rowid()").fetchone()[0] os.environ['REDO_RUNID'] = str(vars.RUNID) _db.execute("pragma journal_mode = PERSIST") _db.execute("pragma synchronous = off") + _db.commit() return _db @@ -70,6 +70,22 @@ def init(): os.unlink(f) +_wrote = 0 +def _write(q, l): + global _wrote + _wrote += 1 + #helpers.log_('W: %r %r\n' % (q,l)) + db().execute(q, l) + + +def commit(): + global _wrote + if _wrote: + #helpers.log_("COMMIT (%d)\n" % _wrote) + db().commit() + _wrote = 0 + + _insane = None def is_sane(): global _insane @@ -139,8 +155,7 @@ class File(object): if not name: raise Exception('File with id=%r not found and ' 'name not given' % id) - d.execute('insert into Files (name) values (?)', [name]) - d.commit() + _write('insert into Files (name) values (?)', [name]) row = d.execute(q, l).fetchone() assert(row) self._init_from_cols(row) @@ -149,15 +164,14 @@ class File(object): if not os.path.exists('%s/.redo' % vars.BASE): # this might happen if 'make clean' removes the .redo dir return - d = db() - d.execute('update Files set ' - ' is_generated=?, checked_runid=?, changed_runid=?, ' - ' stamp=?, csum=? ' - ' where rowid=?', - [self.is_generated, self.checked_runid, self.changed_runid, - self.stamp, self.csum, - self.id]) - d.commit() + _write('update Files set ' + ' is_generated=?, checked_runid=?, changed_runid=?, ' + ' stamp=?, csum=? ' + ' where rowid=?', + [self.is_generated, + self.checked_runid, self.changed_runid, + self.stamp, self.csum, + self.id]) def set_checked(self): self.checked_runid = vars.RUNID @@ -199,21 +213,16 @@ class File(object): def zap_deps(self): debug2('zap-deps: %r\n' % self.name) - db().execute('delete from Deps where target=?', [self.id]) - db().commit() + _write('delete from Deps where target=?', [self.id]) def add_dep(self, mode, dep): src = File(name=dep) reldep = relpath(dep, vars.BASE) debug2('add-dep: %r < %s %r\n' % (self.name, mode, reldep)) assert(src.name == reldep) - d = db() - d.execute("delete from Deps where target=? and source=?", - [self.id, src.id]) - d.execute("insert into Deps " - " (target, mode, source) values (?,?,?)", - [self.id, mode, src.id]) - d.commit() + _write("insert or replace into Deps " + " (target, mode, source) values (?,?,?)", + [self.id, mode, src.id]) def read_stamp(self): try: From 3ef2bd73001ff562f7b197ca8b44bb3cc4392329 Mon Sep 17 00:00:00 2001 From: Avery Pennarun Date: Thu, 9 Dec 2010 03:01:26 -0800 Subject: [PATCH 09/20] Don't check as often whether the .redo directory exists. Just check it once after running a subprocess: that's the only way it ought to be able to disappear (ie. in a 'make clean' setup). --- builder.py | 7 +++++-- helpers.py | 18 ------------------ redo-ifchange.py | 2 +- redo-ifcreate.py | 2 +- state.py | 23 ++++++++++++++--------- 5 files changed, 21 insertions(+), 31 deletions(-) diff --git a/builder.py b/builder.py index b63fdb0..7b083f9 100644 --- a/builder.py +++ b/builder.py @@ -130,6 +130,7 @@ class BuildJob: def _after(self, t, rv): try: + state.check_sane() rv = self._after1(t, rv) finally: self._after2(rv) @@ -211,7 +212,8 @@ def main(targets, shouldbuildfunc): jwack.get_token(t) if retcode[0] and not vars.KEEP_GOING: break - if not state.is_sane(): + if not state.check_sane(): + err('.redo directory disappeared; cannot continue.\n') retcode[0] = 205 break lock = state.Lock(t) @@ -234,7 +236,8 @@ def main(targets, shouldbuildfunc): if retcode[0] and not vars.KEEP_GOING: break if locked: - if not state.is_sane(): + if not state.check_sane(): + err('.redo directory disappeared; cannot continue.\n') retcode[0] = 205 break t = locked.pop(0) diff --git a/helpers.py b/helpers.py index 3765343..ef5f011 100644 --- a/helpers.py +++ b/helpers.py @@ -15,24 +15,6 @@ def unlink(f): pass # it doesn't exist, that's what you asked for -def mkdirp(d, mode=None): - """Recursively create directories on path 'd'. - - Unlike os.makedirs(), it doesn't raise an exception if the last element of - the path already exists. - """ - try: - if mode: - os.makedirs(d, mode) - else: - os.makedirs(d) - except OSError, e: - if e.errno == errno.EEXIST: - pass - else: - raise - - def log_(s): sys.stdout.flush() if vars.DEBUG_PIDS: diff --git a/redo-ifchange.py b/redo-ifchange.py index 5267cfb..2a433cc 100755 --- a/redo-ifchange.py +++ b/redo-ifchange.py @@ -1,7 +1,7 @@ #!/usr/bin/python import sys, os, errno, stat import vars, state, builder, jwack -from helpers import debug, debug2, err, mkdirp, unlink +from helpers import debug, debug2, err, unlink def dirty_deps(f, depth, max_changed): diff --git a/redo-ifcreate.py b/redo-ifcreate.py index 9d862c9..d4c190c 100755 --- a/redo-ifcreate.py +++ b/redo-ifcreate.py @@ -1,7 +1,7 @@ #!/usr/bin/python import sys, os import vars, state -from helpers import err, mkdirp +from helpers import err try: diff --git a/state.py b/state.py index 661ee3d..beac06e 100644 --- a/state.py +++ b/state.py @@ -1,6 +1,6 @@ import sys, os, errno, glob, stat, sqlite3 import vars -from helpers import unlink, err, debug2, debug3, mkdirp, close_on_exec +from helpers import unlink, err, debug2, debug3, close_on_exec import helpers SCHEMA_VER=1 @@ -13,7 +13,13 @@ def db(): return _db dbdir = '%s/.redo' % vars.BASE dbfile = '%s/db.sqlite3' % dbdir - mkdirp(dbdir) + try: + os.mkdir(dbdir) + except OSError, e: + if e.errno == errno.EEXIST: + pass # if it exists, that's okay + else: + raise must_create = not os.path.exists(dbfile) if not must_create: _db = sqlite3.connect(dbfile, timeout=TIMEOUT) @@ -72,6 +78,8 @@ def init(): _wrote = 0 def _write(q, l): + if _insane: + return global _wrote _wrote += 1 #helpers.log_('W: %r %r\n' % (q,l)) @@ -79,6 +87,8 @@ def _write(q, l): def commit(): + if _insane: + return global _wrote if _wrote: #helpers.log_("COMMIT (%d)\n" % _wrote) @@ -87,12 +97,10 @@ def commit(): _insane = None -def is_sane(): - global _insane +def check_sane(): + global _insane, _writable if not _insane: _insane = not os.path.exists('%s/.redo' % vars.BASE) - if _insane: - err('.redo directory disappeared; cannot continue.\n') return not _insane @@ -161,9 +169,6 @@ class File(object): self._init_from_cols(row) def save(self): - if not os.path.exists('%s/.redo' % vars.BASE): - # this might happen if 'make clean' removes the .redo dir - return _write('update Files set ' ' is_generated=?, checked_runid=?, changed_runid=?, ' ' stamp=?, csum=? ' From 94cecc240b03771ce318492e9d30e7a1249397b6 Mon Sep 17 00:00:00 2001 From: Avery Pennarun Date: Thu, 9 Dec 2010 03:33:53 -0800 Subject: [PATCH 10/20] Don't abort if 'insert into Files' gives an IntegrityError. It can happen occasionally if some other parallel redo adds the same file at the same time. --- state.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/state.py b/state.py index beac06e..df41a5b 100644 --- a/state.py +++ b/state.py @@ -163,7 +163,12 @@ class File(object): if not name: raise Exception('File with id=%r not found and ' 'name not given' % id) - _write('insert into Files (name) values (?)', [name]) + try: + _write('insert into Files (name) values (?)', [name]) + except sqlite3.IntegrityError: + # some parallel redo probably added it at the same time; no + # big deal. + pass row = d.execute(q, l).fetchone() assert(row) self._init_from_cols(row) From e1a0fc9c12b9d9a0efb622f3ee83be014dbaa009 Mon Sep 17 00:00:00 2001 From: Avery Pennarun Date: Thu, 9 Dec 2010 03:43:08 -0800 Subject: [PATCH 11/20] state.File.is_checked() was being too paranoid. It wasn't allowing us to short circuit a dependency if that dependency had been built previously, but that was already being checked (more correctly) in dirty_deps(). --- state.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/state.py b/state.py index df41a5b..887649d 100644 --- a/state.py +++ b/state.py @@ -204,9 +204,7 @@ class File(object): return self.changed_runid and self.changed_runid >= vars.RUNID def is_checked(self): - return (self.checked_runid and self.checked_runid >= vars.RUNID - and not (self.changed_runid - and self.changed_runid >= self.checked_runid)) + return self.checked_runid and self.checked_runid >= vars.RUNID def deps(self): q = ('select Deps.mode, Deps.source, ' From e446d4dd04ba8edc91714b24254bd4d50bcd04ee Mon Sep 17 00:00:00 2001 From: Avery Pennarun Date: Thu, 9 Dec 2010 04:54:40 -0800 Subject: [PATCH 12/20] builder.py: don't import the 'random' module unless we need it. Initializing the random number generator involves some pointless reading from /dev/urandom. --- builder.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/builder.py b/builder.py index 7b083f9..10094d9 100644 --- a/builder.py +++ b/builder.py @@ -1,4 +1,4 @@ -import sys, os, random, errno, stat +import sys, os, errno, stat import vars, jwack, state from helpers import log, log_, debug2, err, unlink, close_on_exec @@ -194,6 +194,7 @@ class BuildJob: def main(targets, shouldbuildfunc): retcode = [0] # a list so that it can be reassigned from done() if vars.SHUFFLE: + import random random.shuffle(targets) locked = [] From b5c02e410e3c6290a302c951454249d0d492ad37 Mon Sep 17 00:00:00 2001 From: Avery Pennarun Date: Thu, 9 Dec 2010 04:58:05 -0800 Subject: [PATCH 13/20] state.py: reorder things so sqlite never does fdatasync(). It was briefly synchronous at data creation time, adding a few ms to redo startup. --- state.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/state.py b/state.py index 887649d..e75a5b0 100644 --- a/state.py +++ b/state.py @@ -6,6 +6,13 @@ import helpers SCHEMA_VER=1 TIMEOUT=60 +def _connect(dbfile): + _db = sqlite3.connect(dbfile, timeout=TIMEOUT) + _db.execute("pragma synchronous = off") + _db.execute("pragma journal_mode = PERSIST") + return _db + + _db = None def db(): global _db @@ -22,7 +29,7 @@ def db(): raise must_create = not os.path.exists(dbfile) if not must_create: - _db = sqlite3.connect(dbfile, timeout=TIMEOUT) + _db = _connect(dbfile) try: row = _db.cursor().execute("select version from Schema").fetchone() except sqlite3.OperationalError: @@ -35,7 +42,7 @@ def db(): _db = None if must_create: unlink(dbfile) - _db = sqlite3.connect(dbfile, timeout=TIMEOUT) + _db = _connect(dbfile) _db.execute("create table Schema " " (version int)") _db.execute("create table Runid " @@ -60,8 +67,6 @@ def db(): vars.RUNID = _db.execute("select last_insert_rowid()").fetchone()[0] os.environ['REDO_RUNID'] = str(vars.RUNID) - _db.execute("pragma journal_mode = PERSIST") - _db.execute("pragma synchronous = off") _db.commit() return _db From 6e6e4539088fc153724361b4259c9efcfdcd14f8 Mon Sep 17 00:00:00 2001 From: Avery Pennarun Date: Thu, 9 Dec 2010 05:53:30 -0800 Subject: [PATCH 14/20] Some speedups for doing redo-ifchange on a large number of static files. Fix some wastage revealed by the (almost useless, sigh) python profiler. --- builder.py | 19 +++++++++++-------- jwack.py | 5 +++++ 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/builder.py b/builder.py index 10094d9..8896ea3 100644 --- a/builder.py +++ b/builder.py @@ -4,6 +4,7 @@ from helpers import log, log_, debug2, err, unlink, close_on_exec def _possible_do_files(t): + t = os.path.join(vars.BASE, t) yield "%s.do" % t, t, '' dirname,filename = os.path.split(t) l = filename.split('.') @@ -16,14 +17,14 @@ def _possible_do_files(t): os.path.join(dirname, basename), ext) -def _find_do_file(t): - for dofile,basename,ext in _possible_do_files(t): - debug2('%s: %s ?\n' % (t, dofile)) +def _find_do_file(f): + for dofile,basename,ext in _possible_do_files(f.name): + debug2('%s: %s ?\n' % (f.name, dofile)) if os.path.exists(dofile): - state.File(name=t).add_dep('m', dofile) + f.add_dep('m', dofile) return dofile,basename,ext else: - state.File(name=t).add_dep('c', dofile) + f.add_dep('c', dofile) return None,None,None @@ -73,7 +74,7 @@ class BuildJob: f.save() return self._after2(0) f.zap_deps() - (dofile, basename, ext) = _find_do_file(t) + (dofile, basename, ext) = _find_do_file(f) if not dofile: if os.path.exists(t): f.is_generated = False @@ -132,6 +133,7 @@ class BuildJob: try: state.check_sane() rv = self._after1(t, rv) + state.commit() finally: self._after2(rv) @@ -186,7 +188,6 @@ class BuildJob: try: self.donefunc(self.t, rv) assert(self.lock.owned) - state.commit() finally: self.lock.unlock() @@ -209,7 +210,8 @@ def main(targets, shouldbuildfunc): # In the first cycle, we just build as much as we can without worrying # about any lock contention. If someone else has it locked, we move on. for t in targets: - state.commit() + if not jwack.has_token(): + state.commit() jwack.get_token(t) if retcode[0] and not vars.KEEP_GOING: break @@ -231,6 +233,7 @@ def main(targets, shouldbuildfunc): # use select.select() to wait on more than one at a time. But it should # be rare enough that it doesn't matter, and the logic is easier this way. while locked or jwack.running(): + state.commit() jwack.wait_all() # at this point, we don't have any children holding any tokens, so # it's okay to block below. diff --git a/jwack.py b/jwack.py index 885c376..e8bef68 100644 --- a/jwack.py +++ b/jwack.py @@ -105,6 +105,11 @@ def wait(want_token): pd.donefunc(pd.name, pd.rv) +def has_token(): + if _mytokens >= 1: + return True + + def get_token(reason): global _mytokens assert(_mytokens <= 1) From 10afd9000f07a648d41b65267a17b7c32823730b Mon Sep 17 00:00:00 2001 From: Avery Pennarun Date: Thu, 9 Dec 2010 06:44:47 -0800 Subject: [PATCH 15/20] Add some conditionals around some high-bandwidth debug statements. When you have lots of unmodified dependencies, building these printout strings (which aren't even printed unless you're using -d) ends up taking something like 5% of the runtime. --- redo-ifchange.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/redo-ifchange.py b/redo-ifchange.py index 2a433cc..cd706b0 100755 --- a/redo-ifchange.py +++ b/redo-ifchange.py @@ -5,7 +5,7 @@ from helpers import debug, debug2, err, unlink def dirty_deps(f, depth, max_changed): - debug('%s?%s\n' % (depth, f.name)) + if vars.DEBUG >= 1: debug('%s?%s\n' % (depth, f.name)) if f.changed_runid == None: debug('%s-- DIRTY (never built)\n' % depth) @@ -14,7 +14,7 @@ def dirty_deps(f, depth, max_changed): debug('%s-- DIRTY (built)\n' % depth) return True # has been built more recently than parent if f.is_checked(): - debug('%s-- CLEAN (checked)\n' % depth) + if vars.DEBUG >= 1: debug('%s-- CLEAN (checked)\n' % depth) return False # has already been checked during this session if not f.stamp: From 84169c5d27cef1cdf17e59f1fee9e3e56747cb04 Mon Sep 17 00:00:00 2001 From: Avery Pennarun Date: Fri, 10 Dec 2010 02:58:13 -0800 Subject: [PATCH 16/20] Change locking stuff from fifos to fcntl.lockf(). This should reduce filesystem grinding a bit, and makes the code simpler. It's also theoretically a bit more portable, since I'm guessing fifo semantics aren't the same on win32 if we ever get there. Also, a major problem with the old fifo-based system is that if a redo process died without cleaning up after itself, it wouldn't delete its lockfiles, so we had to wipe them all at the beginning of each build. Now we don't; in theory, you can now have multiple copies of redo poking at the same tree at the same time and not stepping on each other. --- builder.py | 49 ++++++++++++++++-------------- state.py | 88 ++++++++++++++++++++---------------------------------- 2 files changed, 58 insertions(+), 79 deletions(-) diff --git a/builder.py b/builder.py index 8896ea3..16695cf 100644 --- a/builder.py +++ b/builder.py @@ -43,8 +43,9 @@ def _try_stat(filename): class BuildJob: - def __init__(self, t, lock, shouldbuildfunc, donefunc): - self.t = t + def __init__(self, t, sf, lock, shouldbuildfunc, donefunc): + self.t = t # original target name, not relative to vars.BASE + self.sf = sf self.tmpname = '%s.redo.tmp' % t self.lock = lock self.shouldbuildfunc = shouldbuildfunc @@ -54,13 +55,13 @@ class BuildJob: def start(self): assert(self.lock.owned) t = self.t - f = state.File(name=t) + sf = self.sf tmpname = self.tmpname if not self.shouldbuildfunc(t): # target doesn't need to be built; skip the whole task return self._after2(0) if (os.path.exists(t) and not os.path.exists(t + '/.') - and not f.is_generated): + and not sf.is_generated): # an existing source file that was not generated by us. # This step is mentioned by djb in his notes. # For example, a rule called default.c.do could be used to try @@ -70,16 +71,16 @@ class BuildJob: # of redo? That would make it easy for someone to override a # file temporarily, and could be undone by deleting the file. debug2("-- static (%r)\n" % t) - f.set_static() - f.save() + sf.set_static() + sf.save() return self._after2(0) - f.zap_deps() - (dofile, basename, ext) = _find_do_file(f) + sf.zap_deps() + (dofile, basename, ext) = _find_do_file(sf) if not dofile: if os.path.exists(t): - f.is_generated = False - f.set_static() - f.save() + sf.is_generated = False + sf.set_static() + sf.save() return self._after2(0) else: err('no rule to make %r\n' % t) @@ -100,8 +101,8 @@ class BuildJob: if vars.VERBOSE or vars.XTRACE: log_('\n') log('%s\n' % _nice(t)) self.argv = argv - f.is_generated = True - f.save() + sf.is_generated = True + sf.save() dof = state.File(name=dofile) dof.set_static() dof.save() @@ -165,14 +166,14 @@ class BuildJob: os.rename(tmpname, t) else: unlink(tmpname) - sf = state.File(name=t) + sf = self.sf sf.is_generated=True sf.update_stamp() sf.set_changed() sf.save() else: unlink(tmpname) - sf = state.File(name=t) + sf = self.sf sf.stamp = None sf.set_changed() sf.save() @@ -219,18 +220,19 @@ def main(targets, shouldbuildfunc): err('.redo directory disappeared; cannot continue.\n') retcode[0] = 205 break - lock = state.Lock(t) + f = state.File(name=t) + lock = state.Lock(f.id) lock.trylock() if not lock.owned: if vars.DEBUG_LOCKS: log('%s (locked...)\n' % _nice(t)) - locked.append(t) + locked.append((f.id,t)) else: - BuildJob(t, lock, shouldbuildfunc, done).start() + BuildJob(t, f, lock, shouldbuildfunc, done).start() # Now we've built all the "easy" ones. Go back and just wait on the - # remaining ones one by one. This is technically non-optimal; we could - # use select.select() to wait on more than one at a time. But it should + # remaining ones one by one. This is non-optimal; we could go faster if + # we could wait on multiple locks at once. But it should # be rare enough that it doesn't matter, and the logic is easier this way. while locked or jwack.running(): state.commit() @@ -244,8 +246,8 @@ def main(targets, shouldbuildfunc): err('.redo directory disappeared; cannot continue.\n') retcode[0] = 205 break - t = locked.pop(0) - lock = state.Lock(t) + fid,t = locked.pop(0) + lock = state.Lock(fid) lock.waitlock() assert(lock.owned) if vars.DEBUG_LOCKS: @@ -255,6 +257,7 @@ def main(targets, shouldbuildfunc): retcode[0] = 2 lock.unlock() else: - BuildJob(t, lock, shouldbuildfunc, done).start() + BuildJob(t, state.File(id=fid), lock, + shouldbuildfunc, done).start() state.commit() return retcode[0] diff --git a/state.py b/state.py index e75a5b0..dc3a7c5 100644 --- a/state.py +++ b/state.py @@ -1,4 +1,4 @@ -import sys, os, errno, glob, stat, sqlite3 +import sys, os, errno, glob, stat, fcntl, sqlite3 import vars from helpers import unlink, err, debug2, debug3, close_on_exec import helpers @@ -14,10 +14,12 @@ def _connect(dbfile): _db = None +_lockfile = None def db(): - global _db + global _db, _lockfile if _db: return _db + dbdir = '%s/.redo' % vars.BASE dbfile = '%s/db.sqlite3' % dbdir try: @@ -27,6 +29,11 @@ def db(): pass # if it exists, that's okay else: raise + + _lockfile = os.open(os.path.join(vars.BASE, '.redo/locks'), + os.O_RDWR | os.O_CREAT, 0666) + close_on_exec(_lockfile, True) + must_create = not os.path.exists(dbfile) if not must_create: _db = _connect(dbfile) @@ -60,7 +67,9 @@ def db(): " mode not null, " " primary key (target,source))") _db.execute("insert into Schema (version) values (?)", [SCHEMA_VER]) - _db.execute("insert into Runid default values") # eat the '0' runid + # eat the '0' runid and File id + _db.execute("insert into Runid default values") + _db.execute("insert into Files (name) values (?)", ['']) if not vars.RUNID: _db.execute("insert into Runid default values") @@ -72,13 +81,7 @@ def db(): def init(): - # FIXME: just wiping out all the locks is kind of cheating. But we - # only do this from the toplevel redo process, so unless the user - # deliberately starts more than one redo on the same repository, it's - # sort of ok. db() - for f in glob.glob('%s/.redo/lock*' % vars.BASE): - os.unlink(f) _wrote = 0 @@ -128,16 +131,8 @@ def relpath(t, base): return '/'.join(tparts) -def xx_sname(typ, t): - # FIXME: t.replace(...) is non-reversible and non-unique here! - tnew = relpath(t, vars.BASE) - v = vars.BASE + ('/.redo/%s^%s' % (typ, tnew.replace('/', '^'))) - if vars.DEBUG >= 3: - debug3('sname: (%r) %r -> %r\n' % (os.getcwd(), t, tnew)) - return v - - class File(object): + # use this mostly to avoid accidentally assigning to typos __slots__ = ['id', 'name', 'is_generated', 'checked_runid', 'changed_runid', 'stamp', 'csum'] @@ -247,62 +242,43 @@ class File(object): else: # a "unique identifier" stamp for a regular file return str((st.st_ctime, st.st_mtime, st.st_size, st.st_ino)) - +# FIXME: I really want to use fcntl F_SETLK, F_SETLKW, etc here. But python +# doesn't do the lockdata structure in a portable way, so we have to use +# fcntl.lockf() instead. Usually this is just a wrapper for fcntl, so it's +# ok, but it doesn't have F_GETLK, so we can't report which pid owns the lock. +# The makes debugging a bit harder. When we someday port to C, we can do that. class Lock: - def __init__(self, t): + def __init__(self, fid): + assert(_lockfile >= 0) self.owned = False - self.rfd = self.wfd = None - self.lockname = xx_sname('lock', t) + self.fid = fid def __del__(self): if self.owned: self.unlock() def trylock(self): + assert(not self.owned) try: - os.mkfifo(self.lockname, 0600) - self.owned = True - self.rfd = os.open(self.lockname, os.O_RDONLY|os.O_NONBLOCK) - self.wfd = os.open(self.lockname, os.O_WRONLY) - close_on_exec(self.rfd, True) - close_on_exec(self.wfd, True) - except OSError, e: - if e.errno == errno.EEXIST: - pass + fcntl.lockf(_lockfile, fcntl.LOCK_EX|fcntl.LOCK_NB, 1, self.fid) + except IOError, e: + if e.errno in (errno.EAGAIN, errno.EACCES): + pass # someone else has it locked else: raise + else: + self.owned = True def waitlock(self): - while not self.owned: - self.wait() - self.trylock() - assert(self.owned) + assert(not self.owned) + fcntl.lockf(_lockfile, fcntl.LOCK_EX, 1, self.fid) + self.owned = True def unlock(self): if not self.owned: raise Exception("can't unlock %r - we don't own it" % self.lockname) - unlink(self.lockname) - # ping any connected readers - os.close(self.rfd) - os.close(self.wfd) - self.rfd = self.wfd = None + fcntl.lockf(_lockfile, fcntl.LOCK_UN, 1, self.fid) self.owned = False - - def wait(self): - if self.owned: - raise Exception("can't wait on %r - we own it" % self.lockname) - try: - # open() will finish only when a writer exists and does close() - fd = os.open(self.lockname, os.O_RDONLY) - try: - os.read(fd, 1) - finally: - os.close(fd) - except OSError, e: - if e.errno == errno.ENOENT: - pass # it's not even unlocked or was unlocked earlier - else: - raise From 675a5106d2588b9e6ec7dde0d114ec486f0e8d8f Mon Sep 17 00:00:00 2001 From: Avery Pennarun Date: Fri, 10 Dec 2010 04:11:44 -0800 Subject: [PATCH 17/20] dup() the jobserver fds to 100,101 to make debugging a bit easier. Now if a process is stuck waiting on one of those fds, it'll be obvious from the strace. --- jwack.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/jwack.py b/jwack.py index e8bef68..2876094 100644 --- a/jwack.py +++ b/jwack.py @@ -70,7 +70,11 @@ def setup(maxjobs): if maxjobs and not _fds: # need to start a new server _toplevel = maxjobs - _fds = os.pipe() + _fds1 = os.pipe() + _fds = (fcntl.fcntl(_fds1[0], fcntl.F_DUPFD, 100), + fcntl.fcntl(_fds1[1], fcntl.F_DUPFD, 101)) + os.close(_fds1[0]) + os.close(_fds1[1]) _release(maxjobs-1) os.putenv('MAKEFLAGS', '%s --jobserver-fds=%d,%d -j' % (os.getenv('MAKEFLAGS'), From f70c028a8a38605d5f859d8549cbe6a6e95c2939 Mon Sep 17 00:00:00 2001 From: Avery Pennarun Date: Fri, 10 Dec 2010 04:31:22 -0800 Subject: [PATCH 18/20] With --debug-locks, print a message when we stop to wait on a lock. Helps in seeing why a particular process might be stopped, and in detecting potential reasons that parallelism might be reduced. --- builder.py | 17 ++++++++++++----- helpers.py | 7 +++++++ 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/builder.py b/builder.py index 16695cf..d5b8216 100644 --- a/builder.py +++ b/builder.py @@ -1,6 +1,6 @@ import sys, os, errno, stat import vars, jwack, state -from helpers import log, log_, debug2, err, unlink, close_on_exec +from helpers import log, log_, debug2, err, warn, unlink, close_on_exec def _possible_do_files(t): @@ -231,9 +231,12 @@ def main(targets, shouldbuildfunc): BuildJob(t, f, lock, shouldbuildfunc, done).start() # Now we've built all the "easy" ones. Go back and just wait on the - # remaining ones one by one. This is non-optimal; we could go faster if - # we could wait on multiple locks at once. But it should - # be rare enough that it doesn't matter, and the logic is easier this way. + # remaining ones one by one. There's no reason to do it any more + # efficiently, because if these targets were previously locked, that + # means someone else was building them; thus, we probably won't need to + # do anything. The only exception is if we're invoked as redo instead + # of redo-ifchange; then we have to redo it even if someone else already + # did. But that should be rare. while locked or jwack.running(): state.commit() jwack.wait_all() @@ -248,7 +251,11 @@ def main(targets, shouldbuildfunc): break fid,t = locked.pop(0) lock = state.Lock(fid) - lock.waitlock() + lock.trylock() + if not lock.owned: + if vars.DEBUG_LOCKS and len(locked) >= 1: + warn('%s (WAITING)\n' % _nice(t)) + lock.waitlock() assert(lock.owned) if vars.DEBUG_LOCKS: log('%s (...unlocked!)\n' % _nice(t)) diff --git a/helpers.py b/helpers.py index ef5f011..169d39c 100644 --- a/helpers.py +++ b/helpers.py @@ -34,13 +34,20 @@ def _cerr(s): def _bwerr(s): log_('redo: %s%s' % (vars.DEPTH, s)) +def _cwarn(s): + log_('\x1b[33mredo: %s\x1b[1m%s\x1b[m' % (vars.DEPTH, s)) +def _bwwarn(s): + log_('redo: %s%s' % (vars.DEPTH, s)) + if os.isatty(2): log = _clog err = _cerr + warn = _cwarn else: log = _bwlog err = _bwerr + warn = _bwwarn def debug(s): From 49ebea445fd0ac82ec4e0c9e4c968b202807b02b Mon Sep 17 00:00:00 2001 From: Avery Pennarun Date: Fri, 10 Dec 2010 04:55:13 -0800 Subject: [PATCH 19/20] jwack: don't ever set the jobserver socket to O_NONBLOCK. It creates a race condition: GNU Make might try to read while the socket is O_NONBLOCK, get EAGAIN, and die; or else another redo might set it back to blocking in between our call to make it O_NONBLOCK and our call to read(). This method - setting an alarm() during the read - is hacky, but should work every time. Unfortunately you get a 1s delay - rarely - when this happens. The good news is it only happens when there are no tokens available anyhow, so it won't affect performance much in any situation I can imagine. --- jwack.py | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/jwack.py b/jwack.py index 2876094..73990d0 100644 --- a/jwack.py +++ b/jwack.py @@ -1,7 +1,7 @@ # # beware the jobberwack # -import sys, os, errno, select, fcntl +import sys, os, errno, select, fcntl, signal import atoi _toplevel = 0 @@ -24,22 +24,35 @@ def _release(n): _mytokens = 1 +def _timeout(sig, frame): + pass + + def _try_read(fd, n): - # FIXME: this isn't actually safe, because GNU make can't handle it if - # the socket is nonblocking. Ugh. That means we'll have to do their - # horrible SIGCHLD hack after all. - fcntl.fcntl(_fds[0], fcntl.F_SETFL, os.O_NONBLOCK) + # using djb's suggested way of doing non-blocking reads from a blocking + # socket: http://cr.yp.to/unix/nonblock.html + # We can't just make the socket non-blocking, because we want to be + # compatible with GNU Make, and they can't handle it. + r,w,x = select.select([fd], [], [], 0) + if not r: + return '' # try again + # ok, the socket is readable - but some other process might get there + # first. We have to set an alarm() in case our read() gets stuck. + oldh = signal.signal(signal.SIGALRM, _timeout) try: + signal.alarm(1) # emergency fallback try: b = os.read(_fds[0], 1) except OSError, e: - if e.errno == errno.EAGAIN: - return '' + if e.errno in (errno.EAGAIN, errno.EINTR): + # interrupted or it was nonblocking + return '' # try again else: raise finally: - fcntl.fcntl(_fds[0], fcntl.F_SETFL, 0) - return b and b or None + signal.alarm(0) + signal.signal(signal.SIGALRM, oldh) + return b and b or None # None means EOF def setup(maxjobs): From 18b5263db7c102bf527e5f287867ac56b4acce09 Mon Sep 17 00:00:00 2001 From: Avery Pennarun Date: Fri, 10 Dec 2010 05:19:49 -0800 Subject: [PATCH 20/20] jwack: fix a typo in the "wrong number of tokens on exit" error. Not that we ever see that error, except when I'm screwing around. --- jwack.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/jwack.py b/jwack.py index 73990d0..797b3bd 100644 --- a/jwack.py +++ b/jwack.py @@ -171,8 +171,8 @@ def wait_all(): bb += b if not b: break if len(bb) != _toplevel-1: - raise Exception('on exit: expected %d tokens; found only %d' - % (_toplevel-1, len(b))) + raise Exception('on exit: expected %d tokens; found only %r' + % (_toplevel-1, len(bb))) os.write(_fds[1], bb)