diff --git a/builder.py b/builder.py index eeeccbb..c36b13a 100644 --- a/builder.py +++ b/builder.py @@ -20,10 +20,10 @@ def _find_do_file(t): for dofile,basename,ext in _possible_do_files(t): debug2('%s: %s ?\n' % (t, dofile)) if os.path.exists(dofile): - state.add_dep(t, 'm', dofile) + state.File(name=t).add_dep('m', dofile) return dofile,basename,ext else: - state.add_dep(t, 'c', dofile) + state.File(name=t).add_dep('c', dofile) return None,None,None @@ -53,12 +53,13 @@ class BuildJob: def start(self): assert(self.lock.owned) t = self.t + f = state.File(name=t) tmpname = self.tmpname if not self.shouldbuildfunc(t): # target doesn't need to be built; skip the whole task return self._after2(0) if (os.path.exists(t) and not os.path.exists(t + '/.') - and not state.is_generated(t)): + and not f.is_generated): # an existing source file that was not generated by us. # This step is mentioned by djb in his notes. # For example, a rule called default.c.do could be used to try @@ -67,20 +68,21 @@ class BuildJob: # FIXME: always refuse to redo any file that was modified outside # of redo? That would make it easy for someone to override a # file temporarily, and could be undone by deleting the file. - state.unmark_as_generated(t) - state.stamp_and_maybe_built(t) + debug2("-- static (%r)\n" % t) + f.set_static() + f.save() return self._after2(0) - state.start(t) + f.zap_deps() (dofile, basename, ext) = _find_do_file(t) if not dofile: if os.path.exists(t): - state.unmark_as_generated(t) - state.stamp_and_maybe_built(t) + f.is_generated = False + f.set_static() + f.save() return self._after2(0) else: err('no rule to make %r\n' % t) return self._after2(1) - state.stamp_and_maybe_built(dofile) unlink(tmpname) ffd = os.open(tmpname, os.O_CREAT|os.O_RDWR|os.O_EXCL, 0666) close_on_exec(ffd, True) @@ -97,13 +99,19 @@ class BuildJob: if vars.VERBOSE or vars.XTRACE: log_('\n') log('%s\n' % _nice(t)) self.argv = argv + f.is_generated = True + f.save() + dof = state.File(name=dofile) + dof.set_static() + dof.save() jwack.start_job(t, self._do_subproc, self._after) def _do_subproc(self): # careful: REDO_PWD was the PWD relative to the STARTPATH at the time # we *started* building the current target; but that target ran # redo-ifchange, and it might have done it from a different directory - # than we started it in. So os.getcwd() might be != REDO_PWD right now. + # than we started it in. So os.getcwd() might be != REDO_PWD right + # now. dn = os.path.dirname(self.t) newp = os.path.realpath(dn) os.environ['REDO_PWD'] = state.relpath(newp, vars.STARTDIR) @@ -153,11 +161,17 @@ class BuildJob: os.rename(tmpname, t) else: unlink(tmpname) - state.built(t) - state.stamp(t) + sf = state.File(name=t) + sf.is_generated=True + sf.update_stamp() + sf.set_changed() + sf.save() else: unlink(tmpname) - state.unstamp(t) + sf = state.File(name=t) + sf.stamp = None + sf.set_changed() + sf.save() f.close() if rv != 0: err('%s: exit code %d\n' % (_nice(t),rv)) @@ -226,7 +240,7 @@ def main(targets, shouldbuildfunc): assert(lock.owned) if vars.DEBUG_LOCKS: log('%s (...unlocked!)\n' % _nice(t)) - if state.stamped(t) == None: + if state.File(name=t).stamp == None: err('%s: failed in another thread\n' % _nice(t)) retcode[0] = 2 lock.unlock() diff --git a/redo-ifchange.py b/redo-ifchange.py index 7182b6d..dde061e 100755 --- a/redo-ifchange.py +++ b/redo-ifchange.py @@ -4,58 +4,59 @@ import vars, state, builder, jwack from helpers import debug, debug2, err, mkdirp, unlink -def dirty_deps(t, depth): - try: - st = os.stat(t) - realtime = st.st_mtime - except OSError: - st = None - realtime = 0 - - debug('%s?%s\n' % (depth, t)) - if state.isbuilt(t): +def dirty_deps(f, depth, max_changed): + debug('%s?%s\n' % (depth, f.name)) + + if f.changed_runid == None: + debug('%s-- DIRTY (never built)\n' % depth) + return True + if f.changed_runid > max_changed: debug('%s-- DIRTY (built)\n' % depth) - return True # has already been built during this session - if state.ismarked(t): - debug('%s-- CLEAN (marked)\n' % depth) + return True # has been built more recently than parent + if f.is_checked(): + debug('%s-- CLEAN (checked)\n' % depth) return False # has already been checked during this session - - stamptime = state.stamped(t) - if stamptime == None: + + if not f.stamp: debug('%s-- DIRTY (no stamp)\n' % depth) return True - if stamptime != realtime and not (st and stat.S_ISDIR(st.st_mode)): + if f.stamp != f.read_stamp(): debug('%s-- DIRTY (mtime)\n' % depth) return True - for mode,name in state.deps(t): + for mode,name in f.deps(): if mode == 'c': if os.path.exists(name): debug('%s-- DIRTY (created)\n' % depth) return True elif mode == 'm': - if dirty_deps(os.path.join(vars.BASE, name), depth + ' '): + f2 = state.File(name=os.path.join(vars.BASE, name)) + if dirty_deps(f2, depth = depth + ' ', + max_changed = f.changed_runid): debug('%s-- DIRTY (sub)\n' % depth) - state.unstamp(t) # optimization for future callers return True - state.mark(t) + f.set_checked() + f.save() return False def should_build(t): - return not state.isbuilt(t) and dirty_deps(t, depth = '') + f = state.File(name=t) + return dirty_deps(f, depth = '', max_changed = vars.RUNID) rv = 202 try: me = os.path.join(vars.STARTDIR, os.path.join(vars.PWD, vars.TARGET)) + f = state.File(name=me) debug2('TARGET: %r %r %r\n' % (vars.STARTDIR, vars.PWD, vars.TARGET)) try: targets = sys.argv[1:] for t in targets: - state.add_dep(me, 'm', t) + f.add_dep('m', t) + f.save() rv = builder.main(targets, should_build) finally: jwack.force_return_tokens() diff --git a/redo-ifcreate.py b/redo-ifcreate.py index 2794888..9d862c9 100755 --- a/redo-ifcreate.py +++ b/redo-ifcreate.py @@ -5,11 +5,12 @@ from helpers import err, mkdirp try: + me = state.File(name=vars.TARGET) for t in sys.argv[1:]: if os.path.exists(t): err('redo-ifcreate: error: %r already exists\n' % t) sys.exit(1) else: - state.add_dep(vars.TARGET, 'c', t) + me.add_dep('c', t) except KeyboardInterrupt: sys.exit(200) diff --git a/state.py b/state.py index 2986c15..17a9e35 100644 --- a/state.py +++ b/state.py @@ -1,20 +1,71 @@ -import sys, os, errno, glob +import sys, os, errno, glob, stat, sqlite3 import vars from helpers import unlink, err, debug2, debug3, mkdirp, close_on_exec +SCHEMA_VER=7 + +_db = None +def db(): + global _db + if _db: + return _db + dbdir = '%s/.redo' % vars.BASE + dbfile = '%s/db.sqlite3' % dbdir + mkdirp(dbdir) + must_create = not os.path.exists(dbfile) + if not must_create: + _db = sqlite3.connect(dbfile) + try: + row = _db.cursor().execute("select version from Schema").fetchone() + except sqlite3.OperationalError: + row = None + ver = row and row[0] or None + if ver != SCHEMA_VER: + err("state database: discarding v%s (wanted v%s)\n" + % (ver, SCHEMA_VER)) + must_create = True + _db = None + if must_create: + unlink(dbfile) + _db = sqlite3.connect(dbfile) + _db.execute("create table Schema (version int)") + _db.execute("create table Runid " + " (id integer primary key autoincrement)") + _db.execute("create table Files (" + " name not null primary key, " + " is_generated int, " + " checked_runid int, " + " changed_runid int, " + " stamp, csum)") + _db.execute("create table Deps " + " (target int, source int, mode not null, primary key (target,source))") + #_db.execute("create unique index Files_name on Files (name)") + #_db.execute("create unique index Deps_ix on Deps (target, source)") + _db.execute("create index Deps_src on Deps (source)") + _db.execute("insert into Schema (version) values (?)", [SCHEMA_VER]) + _db.execute("insert into Runid default values") + _db.execute("insert into Runid default values") + _db.commit() + + if not vars.RUNID: + _db.execute("insert into Runid default values") + _db.commit() + vars.RUNID = _db.execute("select last_insert_rowid()").fetchone()[0] + os.environ['REDO_RUNID'] = str(vars.RUNID) + + _db.execute("pragma journal_mode = PERSIST") + _db.execute("pragma synchronous = off") + return _db + def init(): # FIXME: just wiping out all the locks is kind of cheating. But we # only do this from the toplevel redo process, so unless the user # deliberately starts more than one redo on the same repository, it's # sort of ok. - mkdirp('%s/.redo' % vars.BASE) + db() for f in glob.glob('%s/.redo/lock*' % vars.BASE): os.unlink(f) - for f in glob.glob('%s/.redo/mark^*' % vars.BASE): - os.unlink(f) - for f in glob.glob('%s/.redo/built^*' % vars.BASE): - os.unlink(f) _insane = None @@ -46,7 +97,7 @@ def relpath(t, base): return '/'.join(tparts) -def _sname(typ, t): +def xx_sname(typ, t): # FIXME: t.replace(...) is non-reversible and non-unique here! tnew = relpath(t, vars.BASE) v = vars.BASE + ('/.redo/%s^%s' % (typ, tnew.replace('/', '^'))) @@ -55,128 +106,120 @@ def _sname(typ, t): return v -def add_dep(t, mode, dep): - sn = _sname('dep', t) - reldep = relpath(dep, vars.BASE) - debug2('add-dep: %r < %s %r\n' % (sn, mode, reldep)) +class File(object): + __slots__ = ['id', 'name', 'is_generated', + 'checked_runid', 'changed_runid', + 'stamp', 'csum'] - open(sn, 'a').write('%s %s\n' % (mode, reldep)) - - -def deps(t): - for line in open(_sname('dep', t)).readlines(): - assert(line[0] in ('c','m')) - assert(line[1] == ' ') - assert(line[-1] == '\n') - mode = line[0] - name = line[2:-1] - yield mode,name - - -def _stampname(t): - return _sname('stamp', t) - - -def stamp(t): - mark(t) - stampfile = _stampname(t) - newstampfile = _sname('stamp' + str(os.getpid()), t) - depfile = _sname('dep', t) - if not os.path.exists(vars.BASE + '/.redo'): - # .redo might not exist in a 'make clean' target - return - open(newstampfile, 'w').close() - try: - mtime = os.stat(t).st_mtime - except OSError: - mtime = 0 - os.utime(newstampfile, (mtime, mtime)) - os.rename(newstampfile, stampfile) - open(depfile, 'a').close() - - -def unstamp(t): - unlink(_stampname(t)) - unlink(_sname('dep', t)) - - -def unmark_as_generated(t): - unstamp(t) - unlink(_sname('gen', t)) - - -def stamped(t): - try: - stamptime = os.stat(_stampname(t)).st_mtime - except OSError, e: - if e.errno == errno.ENOENT: - return None + def __init__(self, id=None, name=None): + q = ('select rowid, name, is_generated, checked_runid, changed_runid, ' + ' stamp, csum ' + ' from Files ') + if id != None: + q += 'where rowid=?' + l = [id] + elif name != None: + name = relpath(name, vars.BASE) + q += 'where name=?' + l = [name] else: - raise - return stamptime + raise Exception('name or id must be set') + d = db() + row = d.execute(q, l).fetchone() + if not row: + if not name: + raise Exception('File with id=%r not found and ' + 'name not given' % id) + d.execute('insert into Files (name) values (?)', [name]) + d.commit() + row = d.execute(q, l).fetchone() + assert(row) + (self.id, self.name, self.is_generated, + self.checked_runid, self.changed_runid, + self.stamp, self.csum) = row + def save(self): + if not os.path.exists('%s/.redo' % vars.BASE): + # this might happen if 'make clean' removes the .redo dir + return + d = db() + d.execute('update Files set ' + ' is_generated=?, checked_runid=?, changed_runid=?, ' + ' stamp=?, csum=? ' + ' where rowid=?', + [self.is_generated, self.checked_runid, self.changed_runid, + self.stamp, self.csum, + self.id]) + d.commit() -def built(t): - try: - open(_sname('built', t), 'w').close() - except IOError, e: - if e.errno == errno.ENOENT: - pass # may happen if someone deletes our .redo dir - else: - raise - - -_builts = {} -def isbuilt(t): - if _builts.get(t): - return True - if os.path.exists(_sname('built', t)): - _builts[t] = True - return True - - -# stamps the given input file, but only considers it to have been "built" if its -# mtime has changed. This is useful for static (non-generated) files. -def stamp_and_maybe_built(t): - if stamped(t) != os.stat(t).st_mtime: - built(t) - stamp(t) - + def set_checked(self): + self.checked_runid = vars.RUNID -def mark(t): - try: - open(_sname('mark', t), 'w').close() - except IOError, e: - if e.errno == errno.ENOENT: - pass # may happen if someone deletes our .redo dir + def set_changed(self): + debug2('BUILT: %r (%r)\n' % (self.name, self.stamp)) + self.changed_runid = vars.RUNID + + def set_static(self): + self.update_stamp() + + def update_stamp(self): + newstamp = self.read_stamp() + if newstamp != self.stamp: + debug2("STAMP: %s: %r -> %r\n" % (self.name, self.stamp, newstamp)) + self.stamp = newstamp + self.set_changed() + + def is_changed(self): + return self.changed_runid and self.changed_runid >= vars.RUNID + + def is_checked(self): + return (self.checked_runid and self.checked_runid >= vars.RUNID + and not (self.changed_runid + and self.changed_runid >= self.checked_runid)) + + def deps(self): + q = "select mode, source from Deps where target=?" + for mode,source_id in db().execute(q, [self.id]): + assert(mode in ('c', 'm')) + name = File(id=source_id).name + yield mode,name + + def zap_deps(self): + debug2('zap-deps: %r\n' % self.name) + db().execute('delete from Deps where target=?', [self.id]) + db().commit() + + def add_dep(self, mode, dep): + src = File(name=dep) + reldep = relpath(dep, vars.BASE) + debug2('add-dep: %r < %s %r\n' % (self.name, mode, reldep)) + assert(src.name == reldep) + d = db() + d.execute("delete from Deps where target=? and source=?", + [self.id, src.id]) + d.execute("insert into Deps " + " (target, mode, source) values (?,?,?)", + [self.id, mode, src.id]) + d.commit() + + def read_stamp(self): + try: + st = os.stat(os.path.join(vars.BASE, self.name)) + except OSError: + return '0' # does not exist + if stat.S_ISDIR(st.st_mode): + return 'dir' # the timestamp of a directory is meaningless else: - raise - - -_marks = {} -def ismarked(t): - if _marks.get(t): - return True - if os.path.exists(_sname('mark', t)): - _marks[t] = True - return True - - -def is_generated(t): - return os.path.exists(_sname('gen', t)) - - -def start(t): - unstamp(t) - open(_sname('dep', t), 'w').close() - open(_sname('gen', t), 'w').close() # it's definitely a generated file + # a "unique identifier" stamp for a regular file + return str((st.st_ctime, st.st_mtime, st.st_size, st.st_ino)) + class Lock: def __init__(self, t): self.owned = False self.rfd = self.wfd = None - self.lockname = _sname('lock', t) + self.lockname = xx_sname('lock', t) def __del__(self): if self.owned: diff --git a/t/flush-cache.sh b/t/flush-cache.sh index 4f8f8f9..318838e 100755 --- a/t/flush-cache.sh +++ b/t/flush-cache.sh @@ -1,4 +1,7 @@ #!/bin/sh #echo "Flushing redo cache..." >&2 -find "$REDO_BASE/.redo" -name 'built^*' -o -name 'mark^*' | - xargs rm -f >&2 +( + echo "update Files set checked_runid=null;" + echo "update Files set changed_runid=changed_runid-1;" + #echo "update Files set stamp='dirty' where id in (select distinct target from Deps);" +) | sqlite3 "$REDO_BASE/.redo/db.sqlite3" diff --git a/vars.py b/vars.py index 54421a0..0547030 100644 --- a/vars.py +++ b/vars.py @@ -18,6 +18,7 @@ XTRACE = os.environ.get('REDO_XTRACE', '') and 1 or 0 KEEP_GOING = os.environ.get('REDO_KEEP_GOING', '') and 1 or 0 SHUFFLE = os.environ.get('REDO_SHUFFLE', '') and 1 or 0 STARTDIR = os.environ['REDO_STARTDIR'] +RUNID = atoi.atoi(os.environ.get('REDO_RUNID')) or None BASE = os.environ['REDO_BASE'] while BASE and BASE.endswith('/'): BASE = BASE[:-1]