feat: open source vibecoding example project

This commit is contained in:
nobody 2025-09-28 11:20:25 -07:00
commit b370f462f2
Signed by: GrocerPublishAgent
GPG key ID: 43B1C298CDDE181C
34 changed files with 7744 additions and 0 deletions

640
src/archive.rs Normal file
View file

@ -0,0 +1,640 @@
// json-archive is a tool for tracking JSON file changes over time
// Copyright (C) 2025 Peoples Grocers LLC
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published
// by the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
//
// To purchase a license under different terms contact admin@peoplesgrocers.com
// To request changes, report bugs, or give user feedback contact
// marxism@peoplesgrocers.com
//
use chrono::Utc;
use serde_json::Value;
use std::fs::{File, OpenOptions};
use std::io::{BufWriter, Write};
use std::path::{Path, PathBuf};
use uuid::Uuid;
use crate::diagnostics::{Diagnostic, DiagnosticCode, DiagnosticLevel};
use crate::diff;
use crate::events::{Event, Header, Observation};
use crate::reader::{ArchiveReader, ReadMode};
pub struct ArchiveWriter {
writer: BufWriter<File>,
observation_count: usize,
snapshot_interval: Option<usize>,
filename: String,
}
impl ArchiveWriter {
pub fn new<P: AsRef<Path>>(
path: P,
snapshot_interval: Option<usize>,
) -> Result<Self, Vec<Diagnostic>> {
let filename = path.as_ref().display().to_string();
let file = match File::create(&path) {
Ok(f) => f,
Err(e) => {
let diagnostic = Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::PathNotFound,
format!("I couldn't create the output file: {}", e)
)
.with_advice(
"Make sure you have write permission in this directory and that the path is valid."
.to_string()
);
return Err(vec![diagnostic]);
}
};
let writer = BufWriter::new(file);
Ok(Self {
writer,
observation_count: 0,
snapshot_interval,
filename,
})
}
pub fn new_append<P: AsRef<Path>>(
path: P,
snapshot_interval: Option<usize>,
current_observation_count: usize,
) -> Result<Self, Vec<Diagnostic>> {
let filename = path.as_ref().display().to_string();
let file = match OpenOptions::new().append(true).open(&path) {
Ok(f) => f,
Err(e) => {
let diagnostic = Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::PathNotFound,
format!("I couldn't open the archive file for appending: {}", e)
)
.with_advice(
"Make sure the archive file exists and you have write permission."
.to_string()
);
return Err(vec![diagnostic]);
}
};
let writer = BufWriter::new(file);
Ok(Self {
writer,
observation_count: current_observation_count,
snapshot_interval,
filename,
})
}
pub fn write_header(&mut self, header: &Header) -> Result<(), Vec<Diagnostic>> {
let header_json = match serde_json::to_string(header) {
Ok(json) => json,
Err(e) => {
return Err(vec![Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::InvalidEventJson,
format!("I couldn't serialize the header to JSON: {}", e),
)
.with_location(self.filename.clone(), 1)]);
}
};
if let Err(e) = writeln!(self.writer, "{}", header_json) {
return Err(vec![Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::PathNotFound,
format!("I couldn't write to the output file: {}", e),
)
.with_location(self.filename.clone(), 1)]);
}
Ok(())
}
pub fn write_comment(&mut self, comment: &str) -> Result<(), Vec<Diagnostic>> {
if let Err(e) = writeln!(self.writer, "# {}", comment) {
return Err(vec![Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::PathNotFound,
format!("I couldn't write to the output file: {}", e),
)]);
}
Ok(())
}
pub fn write_observation(&mut self, observation: Observation) -> Result<(), Vec<Diagnostic>> {
let events = observation.to_events();
for event in events {
let event_json = match serde_json::to_string(&event) {
Ok(json) => json,
Err(e) => {
return Err(vec![Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::InvalidEventJson,
format!("I couldn't serialize an event to JSON: {}", e),
)]);
}
};
if let Err(e) = writeln!(self.writer, "{}", event_json) {
return Err(vec![Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::PathNotFound,
format!("I couldn't write to the output file: {}", e),
)]);
}
}
self.observation_count += 1;
Ok(())
}
pub fn write_snapshot(&mut self, object: &Value) -> Result<(), Vec<Diagnostic>> {
let snapshot_id = format!("snapshot-{}", Uuid::new_v4());
let snapshot = Event::Snapshot {
observation_id: snapshot_id,
timestamp: Utc::now(),
object: object.clone(),
};
let event_json = match serde_json::to_string(&snapshot) {
Ok(json) => json,
Err(e) => {
return Err(vec![Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::InvalidEventJson,
format!("I couldn't serialize the snapshot to JSON: {}", e),
)]);
}
};
if let Err(e) = writeln!(self.writer, "{}", event_json) {
return Err(vec![Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::PathNotFound,
format!("I couldn't write to the output file: {}", e),
)]);
}
Ok(())
}
pub fn should_write_snapshot(&self) -> bool {
if let Some(interval) = self.snapshot_interval {
self.observation_count > 0 && self.observation_count % interval == 0
} else {
false
}
}
pub fn finish(mut self) -> Result<(), Vec<Diagnostic>> {
if let Err(e) = self.writer.flush() {
return Err(vec![Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::PathNotFound,
format!("I couldn't flush the output file: {}", e),
)]);
}
Ok(())
}
}
pub struct ArchiveBuilder {
initial_state: Option<Value>,
current_state: Value,
source: Option<String>,
snapshot_interval: Option<usize>,
}
impl ArchiveBuilder {
pub fn new() -> Self {
Self {
initial_state: None,
current_state: Value::Null,
source: None,
snapshot_interval: None,
}
}
pub fn with_source(mut self, source: String) -> Self {
self.source = Some(source);
self
}
pub fn with_snapshot_interval(mut self, interval: usize) -> Self {
self.snapshot_interval = Some(interval);
self
}
pub fn add_state(&mut self, state: Value) -> Option<Observation> {
if self.initial_state.is_none() {
self.initial_state = Some(state.clone());
self.current_state = state;
return None;
}
let observation_id = format!("obs-{}", Uuid::new_v4());
let timestamp = Utc::now();
let diff_result: Vec<Event> = diff::diff(&self.current_state, &state, "", &observation_id);
self.current_state = state;
let mut observation = Observation::new(observation_id, timestamp);
for event in diff_result {
observation.add_event(event);
}
Some(observation)
}
pub fn build<P: AsRef<Path>>(self, output_path: P) -> Result<(), Vec<Diagnostic>> {
if self.initial_state.is_none() {
return Err(vec![Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::MissingHeaderField,
"I can't build an archive without any initial state.".to_string(),
)]);
}
let header = Header::new(self.initial_state.unwrap(), self.source);
let mut writer = ArchiveWriter::new(output_path, self.snapshot_interval)?;
writer.write_header(&header)?;
writer.finish()?;
Ok(())
}
pub fn get_initial_state(&self) -> Option<&Value> {
self.initial_state.as_ref()
}
}
/// Generate default output filename from input filename
pub fn default_output_filename<P: AsRef<Path>>(input_path: P) -> PathBuf {
let path = input_path.as_ref();
let mut output = path.to_path_buf();
// If it already ends with .json.archive, don't modify it
if let Some(filename) = path.file_name() {
if let Some(filename_str) = filename.to_str() {
if filename_str.ends_with(".json.archive") {
return output;
}
}
}
// Add .json.archive extension
if let Some(extension) = path.extension() {
if extension == "json" {
// Replace .json with .json.archive
output.set_extension("json.archive");
} else {
// Append .json.archive to whatever extension exists
let new_extension = format!("{}.json.archive", extension.to_string_lossy());
output.set_extension(new_extension);
}
} else {
// No extension, just add .json.archive
output.set_extension("json.archive");
}
output
}
pub fn create_archive_from_files<P: AsRef<Path>>(
input_files: &[P],
output_path: P,
source: Option<String>,
snapshot_interval: Option<usize>,
) -> Result<(), Vec<Diagnostic>> {
let mut builder = ArchiveBuilder::new();
if let Some(source) = source {
builder = builder.with_source(source);
}
if let Some(interval) = snapshot_interval {
builder = builder.with_snapshot_interval(interval);
}
let first_content = std::fs::read_to_string(&input_files[0]).map_err(|e| {
vec![Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::PathNotFound,
format!("I couldn't read the first input file: {}", e),
)]
})?;
let first_state: Value = serde_json::from_str(&first_content).map_err(|e| {
vec![Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::InvalidEventJson,
format!("I couldn't parse the first input file as JSON: {}", e),
)
.with_advice("Make sure the file contains valid JSON.".to_string())]
})?;
let _ = builder.add_state(first_state.clone());
let header = Header::new(first_state, builder.source.clone());
let mut writer = ArchiveWriter::new(&output_path, builder.snapshot_interval)?;
writer.write_header(&header)?;
for file_path in input_files[1..].iter() {
writer.write_comment(&format!("Processing file: {:?}", file_path.as_ref()))?;
let content = std::fs::read_to_string(file_path).map_err(|e| {
vec![Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::PathNotFound,
format!("I couldn't read the input file: {}", e),
)]
})?;
let state: Value = serde_json::from_str(&content).map_err(|e| {
vec![Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::InvalidEventJson,
format!("I couldn't parse the input file as JSON: {}", e),
)
.with_advice("Make sure the file contains valid JSON.".to_string())]
})?;
if let Some(observation) = builder.add_state(state.clone()) {
writer.write_observation(observation)?;
if writer.should_write_snapshot() {
writer.write_snapshot(&state)?;
}
}
}
writer.finish()?;
Ok(())
}
pub fn append_to_archive<P: AsRef<Path>, Q: AsRef<Path>>(
archive_path: P,
new_files: &[Q],
output_path: P,
source: Option<String>,
snapshot_interval: Option<usize>,
) -> Vec<Diagnostic> {
// Read the existing archive to get the final state
let reader = match ArchiveReader::new(&archive_path, ReadMode::AppendSeek) {
Ok(r) => r,
Err(e) => {
return vec![Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::PathNotFound,
format!("I couldn't open the archive for reading: {}", e),
)];
}
};
let read_result = match reader.read(&archive_path) {
Ok(result) => result,
Err(e) => {
return vec![Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::PathNotFound,
format!("I couldn't read the archive: {}", e),
)];
}
};
// Check for fatal diagnostics in the archive
if read_result.diagnostics.has_fatal() {
let mut diagnostics = vec![Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::InvalidEventJson,
"The existing archive contains fatal errors. Cannot append to a corrupt archive.".to_string(),
)];
diagnostics.extend(read_result.diagnostics.into_diagnostics());
return diagnostics;
}
// If output path is different from archive path, copy the archive first
if archive_path.as_ref() != output_path.as_ref() {
if let Err(e) = std::fs::copy(&archive_path, &output_path) {
return vec![Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::PathNotFound,
format!("I couldn't copy the archive to the output location: {}", e),
)];
}
}
// Create an append writer
let mut writer = match ArchiveWriter::new_append(&output_path, snapshot_interval, read_result.observation_count) {
Ok(w) => w,
Err(diagnostics) => return diagnostics,
};
// Create a builder to track state changes
let mut builder = ArchiveBuilder::new();
if let Some(source) = source {
builder = builder.with_source(source);
}
if let Some(interval) = snapshot_interval {
builder = builder.with_snapshot_interval(interval);
}
// Initialize builder with the final state from the archive
let current_state = read_result.final_state;
builder.current_state = current_state.clone();
builder.initial_state = Some(current_state.clone());
// Process each new file
for file_path in new_files.iter() {
if let Err(diagnostics) = writer.write_comment(&format!("Processing file: {:?}", file_path.as_ref())) {
return diagnostics;
}
let content = match std::fs::read_to_string(file_path) {
Ok(content) => content,
Err(e) => {
return vec![Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::PathNotFound,
format!("I couldn't read the input file: {}", e),
)];
}
};
let state: Value = match serde_json::from_str(&content) {
Ok(state) => state,
Err(e) => {
return vec![Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::InvalidEventJson,
format!("I couldn't parse the input file as JSON: {}", e),
)
.with_advice("Make sure the file contains valid JSON.".to_string())];
}
};
if let Some(observation) = builder.add_state(state.clone()) {
if let Err(diagnostics) = writer.write_observation(observation) {
return diagnostics;
}
if writer.should_write_snapshot() {
if let Err(diagnostics) = writer.write_snapshot(&state) {
return diagnostics;
}
}
}
}
// Finish writing
match writer.finish() {
Ok(()) => Vec::new(),
Err(diagnostics) => diagnostics,
}
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
use std::io::Write;
use tempfile::NamedTempFile;
#[test]
fn test_archive_writer_header() -> Result<(), Box<dyn std::error::Error>> {
let temp_file = NamedTempFile::new()?;
let header = Header::new(json!({"test": "value"}), Some("test-source".to_string()));
{
let mut writer = ArchiveWriter::new(temp_file.path(), None)
.map_err(|_| "Failed to create writer")?;
writer
.write_header(&header)
.map_err(|_| "Failed to write header")?;
writer.finish().map_err(|_| "Failed to finish")?;
}
let content = std::fs::read_to_string(temp_file.path())?;
let lines: Vec<&str> = content.lines().collect();
assert_eq!(lines.len(), 1);
let parsed_header: Header = serde_json::from_str(lines[0])?;
assert_eq!(parsed_header.file_type, "@peoplesgrocers/json-archive");
assert_eq!(parsed_header.version, 1);
assert_eq!(parsed_header.initial, json!({"test": "value"}));
Ok(())
}
#[test]
fn test_archive_builder() -> Result<(), Box<dyn std::error::Error>> {
let mut builder = ArchiveBuilder::new();
// First state becomes initial
let result = builder.add_state(json!({"count": 0}));
assert!(result.is_none());
// Second state generates observation
let observation = builder
.add_state(json!({"count": 1}))
.expect("Should generate observation");
assert!(!observation.events.is_empty());
Ok(())
}
#[test]
fn test_create_archive_from_files() -> Result<(), Box<dyn std::error::Error>> {
// Create temporary input files
let mut file1 = NamedTempFile::new()?;
let mut file2 = NamedTempFile::new()?;
let output_file = NamedTempFile::new()?;
writeln!(file1, r#"{{"count": 0, "name": "test"}}"#)?;
writeln!(file2, r#"{{"count": 1, "name": "test"}}"#)?;
let input_files = vec![file1.path(), file2.path()];
create_archive_from_files(
&input_files,
output_file.path(),
Some("test-source".to_string()),
None,
)
.map_err(|_| "Failed to create archive")?;
let content = std::fs::read_to_string(output_file.path())?;
let lines: Vec<&str> = content.lines().collect();
assert!(lines.len() >= 2); // At least header + comment + observe + change events
// First line should be header
let header: Header = serde_json::from_str(lines[0])?;
assert_eq!(header.file_type, "@peoplesgrocers/json-archive");
assert_eq!(header.version, 1);
assert_eq!(header.initial, json!({"count": 0, "name": "test"}));
Ok(())
}
#[test]
fn test_snapshot_interval() -> Result<(), Box<dyn std::error::Error>> {
let temp_file = NamedTempFile::new()?;
let mut writer =
ArchiveWriter::new(temp_file.path(), Some(2)).map_err(|_| "Failed to create writer")?;
assert!(!writer.should_write_snapshot()); // No observations yet
let obs1 = Observation::new("obs-1".to_string(), Utc::now());
writer
.write_observation(obs1)
.map_err(|_| "Failed to write observation")?;
assert!(!writer.should_write_snapshot()); // 1 observation, interval is 2
let obs2 = Observation::new("obs-2".to_string(), Utc::now());
writer
.write_observation(obs2)
.map_err(|_| "Failed to write observation")?;
assert!(writer.should_write_snapshot()); // 2 observations, should snapshot
Ok(())
}
#[test]
fn test_default_output_filename() {
assert_eq!(
default_output_filename("test.json"),
PathBuf::from("test.json.archive")
);
assert_eq!(
default_output_filename("test.txt"),
PathBuf::from("test.txt.json.archive")
);
assert_eq!(
default_output_filename("test"),
PathBuf::from("test.json.archive")
);
assert_eq!(
default_output_filename("test.json.archive"),
PathBuf::from("test.json.archive")
);
}
}

410
src/cmd/info.rs Normal file
View file

@ -0,0 +1,410 @@
// json-archive is a tool for tracking JSON file changes over time
// Copyright (C) 2025 Peoples Grocers LLC
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published
// by the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
//
// To purchase a license under different terms contact admin@peoplesgrocers.com
// To request changes, report bugs, or give user feedback contact
// marxism@peoplesgrocers.com
//
use crate::flags;
use chrono::{DateTime, Utc};
use json_archive::{Diagnostic, DiagnosticCode, DiagnosticLevel};
use serde::Serialize;
use serde_json::Value;
use std::fs::File;
use std::io::{BufRead, BufReader};
use std::path::Path;
#[derive(Debug)]
struct ObservationInfo {
id: String,
timestamp: DateTime<Utc>,
created: DateTime<Utc>, // For initial state, this is the archive creation time
change_count: usize,
json_size: usize,
}
#[derive(Serialize)]
struct JsonObservation {
index: usize,
id: String,
timestamp: String,
changes: usize,
json_size: usize,
}
#[derive(Serialize)]
struct JsonInfoOutput {
archive: String,
created: String,
file_size: u64,
snapshot_count: usize,
observations: Vec<JsonObservation>,
}
pub fn run(flags: &flags::Info) -> Vec<Diagnostic> {
if !flags.file.exists() {
return vec![Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::PathNotFound,
format!("I couldn't find the archive file: {}", flags.file.display()),
)
.with_advice(
"Make sure the file path is correct and the file exists. \
Check for typos in the filename."
.to_string(),
)];
}
let observations = match collect_observations(&flags.file) {
Ok(obs) => obs,
Err(diagnostics) => return diagnostics,
};
let file_size = match std::fs::metadata(&flags.file) {
Ok(metadata) => metadata.len(),
Err(_) => 0,
};
let snapshot_count = count_snapshots(&flags.file).unwrap_or(0);
// Check output format
let is_json_output = flags.output.as_ref().map(|s| s == "json").unwrap_or(false);
if is_json_output {
// JSON output mode
if observations.is_empty() {
let empty_output = JsonInfoOutput {
archive: flags.file.display().to_string(),
created: "".to_string(),
file_size,
snapshot_count,
observations: Vec::new(),
};
println!(
"{}",
serde_json::to_string_pretty(&empty_output).unwrap_or_default()
);
return Vec::new();
}
let json_observations: Vec<JsonObservation> = observations
.iter()
.enumerate()
.map(|(index, obs)| JsonObservation {
index,
id: if index == 0 {
"initial".to_string()
} else {
obs.id.clone()
},
timestamp: obs.timestamp.to_rfc3339(),
changes: obs.change_count,
json_size: obs.json_size,
})
.collect();
let json_output = JsonInfoOutput {
archive: flags.file.display().to_string(),
created: observations[0].created.to_rfc3339(),
file_size,
snapshot_count,
observations: json_observations,
};
println!(
"{}",
serde_json::to_string_pretty(&json_output).unwrap_or_default()
);
} else {
// Human-readable output mode
println!("Archive: {}", flags.file.display());
if observations.is_empty() {
println!("No observations found");
return Vec::new();
}
let first_timestamp = &observations[0].created;
let last_timestamp = if observations.len() > 1 {
&observations.last().unwrap().timestamp
} else {
first_timestamp
};
println!("Created: {}", format_timestamp(first_timestamp));
println!();
if observations.len() == 1 {
println!("1 observation on {}", format_timestamp(first_timestamp));
} else {
println!(
"{} observations from {} to {}",
observations.len(),
format_timestamp(first_timestamp),
format_timestamp(last_timestamp)
);
}
println!();
// Table header
println!(" # Observation ID Date & Time Changes JSON Size");
println!("────────────────────────────────────────────────────────────────────────────────────────");
for (index, obs) in observations.iter().enumerate() {
let id_display = if index == 0 {
"(initial)".to_string()
} else {
truncate_id(&obs.id)
};
let changes_display = if index == 0 {
"-".to_string()
} else {
obs.change_count.to_string()
};
println!(
" {:2} {:32} {:25} {:7} {:9}",
index,
id_display,
format_timestamp(&obs.timestamp),
changes_display,
format_size(obs.json_size as u64)
);
}
println!();
let snapshot_text = if snapshot_count == 0 {
"0 snapshots".to_string()
} else {
format!("{} snapshots", snapshot_count)
};
println!(
"Total archive size: {} ({})",
format_size(file_size),
snapshot_text
);
// Add usage instructions
println!();
println!("To get the JSON value at a specific observation:");
println!(" json-archive state --index <#> {}", flags.file.display());
println!(
" json-archive state --id <observation-id> {}",
flags.file.display()
);
println!();
println!("Examples:");
println!(
" json-archive state --index 0 {} # Get initial state",
flags.file.display()
);
println!(
" json-archive state --index 2 {} # Get state after observation 2",
flags.file.display()
);
}
Vec::new()
}
fn collect_observations(file_path: &Path) -> Result<Vec<ObservationInfo>, Vec<Diagnostic>> {
let file = match File::open(file_path) {
Ok(f) => f,
Err(e) => {
return Err(vec![Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::PathNotFound,
format!("I couldn't open the archive file: {}", e),
)]);
}
};
let reader = BufReader::new(file);
let mut lines = reader.lines();
let mut observations = Vec::new();
// Parse header
let header_line = match lines.next() {
Some(Ok(line)) => line,
_ => {
return Err(vec![Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::EmptyFile,
"Archive file is empty or unreadable".to_string(),
)]);
}
};
let header: Value = match serde_json::from_str(&header_line) {
Ok(h) => h,
Err(e) => {
return Err(vec![Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::MissingHeader,
format!("I couldn't parse the header: {}", e),
)]);
}
};
let created_str = header["created"].as_str().unwrap_or("");
let created: DateTime<Utc> = match created_str.parse() {
Ok(dt) => dt,
Err(_) => Utc::now(),
};
let initial_state = header["initial"].clone();
let initial_size = serde_json::to_string(&initial_state)
.unwrap_or_default()
.len();
// Add initial state as observation 0
observations.push(ObservationInfo {
id: "initial".to_string(),
timestamp: created,
created,
change_count: 0,
json_size: initial_size,
});
let mut current_state = initial_state;
// Parse events
for line in lines {
let line = match line {
Ok(l) => l,
Err(_) => continue,
};
if line.trim().starts_with('#') || line.trim().is_empty() {
continue;
}
let event: Value = match serde_json::from_str(&line) {
Ok(e) => e,
Err(_) => continue,
};
if let Some(arr) = event.as_array() {
if arr.is_empty() {
continue;
}
let event_type = arr[0].as_str().unwrap_or("");
if event_type == "observe" && arr.len() >= 4 {
let obs_id = arr[1].as_str().unwrap_or("").to_string();
let timestamp_str = arr[2].as_str().unwrap_or("");
let change_count = arr[3].as_u64().unwrap_or(0) as usize;
let timestamp: DateTime<Utc> = match timestamp_str.parse() {
Ok(dt) => dt,
Err(_) => continue,
};
observations.push(ObservationInfo {
id: obs_id,
timestamp,
created,
change_count,
json_size: 0, // Will be calculated after applying events
});
} else {
// Apply the event to current_state for size calculation
apply_event_to_state(&mut current_state, &arr);
// Update the JSON size of the last observation
if let Some(last_obs) = observations.last_mut() {
last_obs.json_size = serde_json::to_string(&current_state)
.unwrap_or_default()
.len();
}
}
}
}
Ok(observations)
}
fn apply_event_to_state(state: &mut Value, event: &[Value]) {
if event.is_empty() {
return;
}
let event_type = event[0].as_str().unwrap_or("");
match event_type {
"add" if event.len() >= 3 => {
let path = event[1].as_str().unwrap_or("");
let value = event[2].clone();
if let Ok(pointer) = json_archive::pointer::JsonPointer::new(path) {
let _ = pointer.set(state, value);
}
}
"change" if event.len() >= 3 => {
let path = event[1].as_str().unwrap_or("");
let value = event[2].clone();
if let Ok(pointer) = json_archive::pointer::JsonPointer::new(path) {
let _ = pointer.set(state, value);
}
}
"remove" if event.len() >= 2 => {
let path = event[1].as_str().unwrap_or("");
if let Ok(pointer) = json_archive::pointer::JsonPointer::new(path) {
let _ = pointer.remove(state);
}
}
_ => {}
}
}
fn format_timestamp(dt: &DateTime<Utc>) -> String {
dt.format("%a %H:%M:%S %d-%b-%Y").to_string()
}
fn truncate_id(id: &str) -> String {
if id.len() > 20 {
format!("{}...", &id[..20])
} else {
id.to_string()
}
}
fn format_size(bytes: u64) -> String {
if bytes < 1024 {
format!("{} bytes", bytes)
} else if bytes < 1024 * 1024 {
format!("{:.1} KB", bytes as f64 / 1024.0)
} else {
format!("{:.1} MB", bytes as f64 / (1024.0 * 1024.0))
}
}
fn count_snapshots(file_path: &Path) -> Result<usize, std::io::Error> {
let file = File::open(file_path)?;
let reader = BufReader::new(file);
let mut count = 0;
for line in reader.lines() {
let line = line?;
if line.trim().starts_with('[') && line.contains("\"snapshot\"") {
count += 1;
}
}
Ok(count)
}

23
src/cmd/mod.rs Normal file
View file

@ -0,0 +1,23 @@
// json-archive is a tool for tracking JSON file changes over time
// Copyright (C) 2025 Peoples Grocers LLC
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published
// by the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
//
// To purchase a license under different terms contact admin@peoplesgrocers.com
// To request changes, report bugs, or give user feedback contact
// marxism@peoplesgrocers.com
//
pub mod info;
pub mod state;

506
src/cmd/state.rs Normal file
View file

@ -0,0 +1,506 @@
// json-archive is a tool for tracking JSON file changes over time
// Copyright (C) 2025 Peoples Grocers LLC
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published
// by the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
//
// To purchase a license under different terms contact admin@peoplesgrocers.com
// To request changes, report bugs, or give user feedback contact
// marxism@peoplesgrocers.com
//
use crate::flags;
use chrono::{DateTime, Utc};
use json_archive::reader::{ArchiveReader, ReadMode};
use json_archive::{Diagnostic, DiagnosticCode, DiagnosticLevel};
use serde_json::Value;
use std::path::Path;
#[derive(Debug)]
enum AccessMethod {
Id(String),
Index(usize),
AsOf(DateTime<Utc>),
RightBefore(DateTime<Utc>),
After(DateTime<Utc>),
Latest,
}
pub fn run(flags: &flags::State) -> Vec<Diagnostic> {
if !flags.file.exists() {
return vec![Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::PathNotFound,
format!("I couldn't find the archive file: {}", flags.file.display()),
)
.with_advice(
"Make sure the file path is correct and the file exists. \
Check for typos in the filename."
.to_string(),
)];
}
// Parse and validate flags - ensure only one access method is specified
let access_method = match parse_access_method(flags) {
Ok(method) => method,
Err(diagnostic) => return vec![diagnostic],
};
// Read the archive using the existing reader
let reader = match ArchiveReader::new(&flags.file, ReadMode::FullValidation) {
Ok(reader) => reader,
Err(e) => {
return vec![Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::PathNotFound,
format!("I couldn't open the archive file: {}", e),
)];
}
};
let result = match reader.read(&flags.file) {
Ok(result) => result,
Err(e) => {
return vec![Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::PathNotFound,
format!("I couldn't read the archive file: {}", e),
)];
}
};
// If there are fatal diagnostics, return them
if result.diagnostics.has_fatal() {
return result.diagnostics.into_diagnostics();
}
// For non-latest access methods, we need to collect observations and find the target
let target_state = match access_method {
AccessMethod::Latest => {
// The reader already gives us the final state after all observations
result.final_state
}
_ => {
// We need to collect observations and replay to the target
match find_and_replay_to_target(&flags.file, &access_method) {
Ok(state) => state,
Err(diagnostics) => return diagnostics,
}
}
};
// Output the JSON state
match serde_json::to_string_pretty(&target_state) {
Ok(json) => println!("{}", json),
Err(e) => {
return vec![Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::InvalidEventJson,
format!("I couldn't serialize the state to JSON: {}", e),
)];
}
}
Vec::new()
}
fn parse_access_method(flags: &flags::State) -> Result<AccessMethod, Diagnostic> {
let mut methods = Vec::new();
if let Some(ref id) = flags.id {
methods.push(AccessMethod::Id(id.clone()));
}
if let Some(index) = flags.index {
methods.push(AccessMethod::Index(index));
}
if let Some(ref as_of_str) = flags.as_of {
match as_of_str.parse::<DateTime<Utc>>() {
Ok(dt) => methods.push(AccessMethod::AsOf(dt)),
Err(_) => {
return Err(Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::InvalidTimestamp,
format!("I couldn't parse the timestamp '{}'. Please use ISO-8601 format like '2025-01-15T10:05:00Z'", as_of_str)
));
}
}
}
if let Some(ref right_before_str) = flags.before {
match right_before_str.parse::<DateTime<Utc>>() {
Ok(dt) => methods.push(AccessMethod::RightBefore(dt)),
Err(_) => {
return Err(Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::InvalidTimestamp,
format!("I couldn't parse the timestamp '{}'. Please use ISO-8601 format like '2025-01-15T10:05:00Z'", right_before_str)
));
}
}
}
if let Some(ref after_str) = flags.after {
match after_str.parse::<DateTime<Utc>>() {
Ok(dt) => methods.push(AccessMethod::After(dt)),
Err(_) => {
return Err(Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::InvalidTimestamp,
format!("I couldn't parse the timestamp '{}'. Please use ISO-8601 format like '2025-01-15T10:05:00Z'", after_str)
));
}
}
}
if flags.latest.unwrap_or(false) {
methods.push(AccessMethod::Latest);
}
match methods.len() {
0 => Ok(AccessMethod::Latest), // Default to latest if no flags specified
1 => Ok(methods.into_iter().next().unwrap()),
_ => Err(Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::WrongFieldCount,
"Please specify only one access method (--id, --index, --as-of, --right-before, --after, or --latest)".to_string()
).with_advice(
"Examples:\n\
json-archive state --id obs-123 file.archive\n\
json-archive state --index 2 file.archive\n\
json-archive state --as-of \"2025-01-15T10:05:00Z\" file.archive"
.to_string()
))
}
}
fn find_and_replay_to_target(
file_path: &Path,
access_method: &AccessMethod,
) -> Result<Value, Vec<Diagnostic>> {
// We need to collect observations with full details to support all access methods
let observations = collect_observations_with_events(file_path)?;
if observations.is_empty() {
return Err(vec![Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::EmptyFile,
"No observations found in the archive".to_string(),
)]);
}
// Find the target observation based on access method
let target_observation = match access_method {
AccessMethod::Id(id) => observations
.iter()
.find(|obs| obs.id == *id)
.ok_or_else(|| {
vec![Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::NonExistentObservationId,
format!("I couldn't find an observation with ID '{}'", id),
)
.with_advice(
"Use 'json-archive info' to see available observation IDs".to_string(),
)]
})?,
AccessMethod::Index(index) => {
if *index >= observations.len() {
return Err(vec![Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::ArrayIndexOutOfBounds,
format!(
"Index {} is out of bounds. The archive has {} observations (0-{})",
index,
observations.len(),
observations.len() - 1
),
)
.with_advice(
"Use 'json-archive info' to see available observation indices".to_string(),
)]);
}
&observations[*index]
}
AccessMethod::AsOf(timestamp) => {
// Find most recent observation with timestamp <= given timestamp
observations
.iter()
.filter(|obs| obs.timestamp <= *timestamp)
.max_by_key(|obs| obs.timestamp)
.ok_or_else(|| {
vec![Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::PathNotFound,
format!(
"No observations found as of {}",
timestamp.format("%Y-%m-%d %H:%M:%S UTC")
),
)
.with_advice(
"Try using --after to find the first observation after this time"
.to_string(),
)]
})?
}
AccessMethod::RightBefore(timestamp) => {
// Find most recent observation with timestamp < given timestamp (strictly before)
observations
.iter()
.filter(|obs| obs.timestamp < *timestamp)
.max_by_key(|obs| obs.timestamp)
.ok_or_else(|| {
vec![Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::PathNotFound,
format!(
"No observations found before {}",
timestamp.format("%Y-%m-%d %H:%M:%S UTC")
),
)
.with_advice(
"Try using --as-of to include observations at exactly this time"
.to_string(),
)]
})?
}
AccessMethod::After(timestamp) => {
// Find earliest observation with timestamp > given timestamp
observations.iter()
.filter(|obs| obs.timestamp > *timestamp)
.min_by_key(|obs| obs.timestamp)
.ok_or_else(|| vec![
Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::PathNotFound,
format!("No observations found after {}", timestamp.format("%Y-%m-%d %H:%M:%S UTC"))
)
.with_advice("Try using --as-of to find the most recent observation before or at this time".to_string())
])?
}
AccessMethod::Latest => {
// Find observation with latest timestamp
observations.iter().max_by_key(|obs| obs.timestamp).unwrap() // Safe because we checked observations is not empty
}
};
// Now replay events from initial state up to and including the target observation
Ok(target_observation.final_state.clone())
}
#[derive(Debug)]
struct ObservationWithEvents {
id: String,
timestamp: DateTime<Utc>,
final_state: Value,
}
fn collect_observations_with_events(
file_path: &Path,
) -> Result<Vec<ObservationWithEvents>, Vec<Diagnostic>> {
use std::fs::File;
use std::io::{BufRead, BufReader};
let file = match File::open(file_path) {
Ok(f) => f,
Err(e) => {
return Err(vec![Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::PathNotFound,
format!("I couldn't open the archive file: {}", e),
)]);
}
};
let reader = BufReader::new(file);
let mut lines = reader.lines();
let mut observations = Vec::new();
// Parse header
let header_line = match lines.next() {
Some(Ok(line)) => line,
_ => {
return Err(vec![Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::EmptyFile,
"Archive file is empty or unreadable".to_string(),
)]);
}
};
let header: Value = match serde_json::from_str(&header_line) {
Ok(h) => h,
Err(e) => {
return Err(vec![Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::MissingHeader,
format!("I couldn't parse the header: {}", e),
)]);
}
};
let created_str = header["created"].as_str().unwrap_or("");
let created: DateTime<Utc> = match created_str.parse() {
Ok(dt) => dt,
Err(_) => Utc::now(),
};
let initial_state = header["initial"].clone();
// Add initial state as observation 0
observations.push(ObservationWithEvents {
id: "initial".to_string(),
timestamp: created,
final_state: initial_state.clone(),
});
let mut current_state = initial_state;
// Parse events and track state at each observation
for line in lines {
let line = match line {
Ok(l) => l,
Err(_) => continue,
};
if line.trim().starts_with('#') || line.trim().is_empty() {
continue;
}
let event: Value = match serde_json::from_str(&line) {
Ok(e) => e,
Err(_) => continue,
};
if let Some(arr) = event.as_array() {
if arr.is_empty() {
continue;
}
let event_type = arr[0].as_str().unwrap_or("");
if event_type == "observe" && arr.len() >= 4 {
let obs_id = arr[1].as_str().unwrap_or("").to_string();
let timestamp_str = arr[2].as_str().unwrap_or("");
let timestamp: DateTime<Utc> = match timestamp_str.parse() {
Ok(dt) => dt,
Err(_) => continue,
};
observations.push(ObservationWithEvents {
id: obs_id,
timestamp,
final_state: current_state.clone(), // Will be updated as events are applied
});
} else if event_type == "snapshot" && arr.len() >= 4 {
// Handle snapshot events
let obs_id = arr[1].as_str().unwrap_or("").to_string();
let timestamp_str = arr[2].as_str().unwrap_or("");
let snapshot_state = arr[3].clone();
let timestamp: DateTime<Utc> = match timestamp_str.parse() {
Ok(dt) => dt,
Err(_) => continue,
};
current_state = snapshot_state.clone();
observations.push(ObservationWithEvents {
id: obs_id,
timestamp,
final_state: snapshot_state,
});
} else {
// Apply the event to current_state
apply_event_to_state(&mut current_state, &arr);
// Update the final state of the last observation
if let Some(last_obs) = observations.last_mut() {
last_obs.final_state = current_state.clone();
}
}
}
}
Ok(observations)
}
fn apply_event_to_state(state: &mut Value, event: &[Value]) {
if event.is_empty() {
return;
}
let event_type = event[0].as_str().unwrap_or("");
match event_type {
"add" if event.len() >= 3 => {
let path = event[1].as_str().unwrap_or("");
let value = event[2].clone();
if let Ok(pointer) = json_archive::pointer::JsonPointer::new(path) {
let _ = pointer.set(state, value);
}
}
"change" if event.len() >= 3 => {
let path = event[1].as_str().unwrap_or("");
let value = event[2].clone();
if let Ok(pointer) = json_archive::pointer::JsonPointer::new(path) {
let _ = pointer.set(state, value);
}
}
"remove" if event.len() >= 2 => {
let path = event[1].as_str().unwrap_or("");
if let Ok(pointer) = json_archive::pointer::JsonPointer::new(path) {
let _ = pointer.remove(state);
}
}
"move" if event.len() >= 3 => {
let path = event[1].as_str().unwrap_or("");
let moves_value = event[2].clone();
if let Ok(pointer) = json_archive::pointer::JsonPointer::new(path) {
if let Ok(array_value) = pointer.get(state) {
if let Some(array) = array_value.as_array() {
let mut arr = array.clone();
if let Some(moves) = moves_value.as_array() {
for move_pair in moves {
if let Some(pair) = move_pair.as_array() {
if pair.len() == 2 {
if let (Some(from_idx), Some(to_idx)) = (
pair[0].as_u64().map(|i| i as usize),
pair[1].as_u64().map(|i| i as usize),
) {
if from_idx < arr.len() && to_idx <= arr.len() {
let element = arr[from_idx].clone();
arr.insert(to_idx, element);
let remove_idx = if from_idx > to_idx {
from_idx + 1
} else {
from_idx
};
if remove_idx < arr.len() {
arr.remove(remove_idx);
}
}
}
}
}
}
}
let _ = pointer.set(state, Value::Array(arr));
}
}
}
}
_ => {}
}
}

185
src/detection.rs Normal file
View file

@ -0,0 +1,185 @@
// json-archive is a tool for tracking JSON file changes over time
// Copyright (C) 2025 Peoples Grocers LLC
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published
// by the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
//
// To purchase a license under different terms contact admin@peoplesgrocers.com
// To request changes, report bugs, or give user feedback contact
// marxism@peoplesgrocers.com
//
//! File type detection for JSON archives.
//!
//! This module exists to support ergonomic command-line usage without requiring
//! `--archive=filename` flags. The goal is to infer intent just from filenames:
//!
//! - `json-archive data.json.archive data.json` -> append data.json to existing archive
//! - `json-archive data.json` -> create new archive from data.json
//! - `json-archive data.json.archive.tmp foo.json bar.json` -> append to archive with .tmp suffix
//!
//! Design choice by @nobody. No user requests for this, just seemed nice.
use std::fs::File;
use std::io::{BufRead, BufReader};
use std::path::Path;
/// Detects if a file is a JSON archive by checking file extension or inspecting the header.
///
/// Detection strategy:
/// 1. Check if filename ends with .json.archive
/// 2. Inspect first line for type field as first key with value "@peoplesgrocers/json-archive"
///
/// Strategy 2 was added by @nobody based on frustration with the Elm compiler,
/// which requires specific file extensions (like .js) while build systems often generate
/// temporary files with arbitrary suffixes like .tmp. @nobody thought it would be nice if the CLI
/// was robust enough to handle this.
///
///
/// The magic value "@peoplesgrocers/json-archive" in the type field works as a file
/// signature for cases where the extension isn't what we expect. Not requested by anyone,
/// just anticipating potential tooling conflicts.
pub fn is_json_archive<P: AsRef<Path>>(path: P) -> Result<bool, std::io::Error> {
let path = path.as_ref();
if let Some(filename) = path.file_name() {
if let Some(filename_str) = filename.to_str() {
if filename_str.ends_with(".json.archive") {
return Ok(true);
}
}
}
let file = File::open(path)?;
let mut reader = BufReader::new(file);
let mut first_line = String::new();
match reader.read_line(&mut first_line) {
Ok(0) => return Ok(false), // Empty file
Ok(_) => {
// Try to parse as JSON and check if it has our type field as the first key
if let Ok(value) = serde_json::from_str::<serde_json::Value>(&first_line) {
if let Some(obj) = value.as_object() {
// Check if the first key is "type" with our expected value
// Note: serde_json::Map preserves insertion order
if let Some((first_key, first_value)) = obj.iter().next() {
if first_key == "type" {
if let Some(type_str) = first_value.as_str() {
return Ok(type_str == "@peoplesgrocers/json-archive");
}
}
}
}
}
}
Err(e) => return Err(e),
}
Ok(false)
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Write;
use tempfile::NamedTempFile;
#[test]
fn test_detect_by_json_archive_extension() -> Result<(), Box<dyn std::error::Error>> {
let mut temp_file = NamedTempFile::with_suffix(".json.archive")?;
writeln!(temp_file, r#"{{"some": "json"}}"#)?;
temp_file.flush()?;
assert!(is_json_archive(temp_file.path())?);
Ok(())
}
#[test]
fn test_detect_by_type_field() -> Result<(), Box<dyn std::error::Error>> {
let mut temp_file = NamedTempFile::with_suffix(".weird-extension")?;
writeln!(
temp_file,
r#"{{"type":"@peoplesgrocers/json-archive","version":1}}"#
)?;
temp_file.flush()?;
assert!(is_json_archive(temp_file.path())?);
Ok(())
}
#[test]
fn test_detect_by_type_field_with_tmp_extension() -> Result<(), Box<dyn std::error::Error>> {
let mut temp_file = NamedTempFile::with_suffix(".json.tmp")?;
writeln!(
temp_file,
r#"{{"type":"@peoplesgrocers/json-archive","version":1}}"#
)?;
temp_file.flush()?;
assert!(is_json_archive(temp_file.path())?);
Ok(())
}
#[test]
fn test_not_archive_regular_json() -> Result<(), Box<dyn std::error::Error>> {
let mut temp_file = NamedTempFile::with_suffix(".json")?;
writeln!(temp_file, r#"{{"some": "json"}}"#)?;
temp_file.flush()?;
assert!(!is_json_archive(temp_file.path())?);
Ok(())
}
#[test]
fn test_not_archive_wrong_type_field() -> Result<(), Box<dyn std::error::Error>> {
let mut temp_file = NamedTempFile::with_suffix(".tmp")?;
writeln!(temp_file, r#"{{"type":"something-else","version":1}}"#)?;
temp_file.flush()?;
assert!(!is_json_archive(temp_file.path())?);
Ok(())
}
#[test]
fn test_not_archive_type_not_first_field() -> Result<(), Box<dyn std::error::Error>> {
let mut temp_file = NamedTempFile::with_suffix(".tmp")?;
// Use a key that comes after "type" alphabetically to ensure it's first
writeln!(
temp_file,
r#"{{"version":1,"zzz":"@peoplesgrocers/json-archive"}}"#
)?;
temp_file.flush()?;
// This should NOT be detected as an archive since the type field doesn't exist
assert!(!is_json_archive(temp_file.path())?);
Ok(())
}
#[test]
fn test_not_archive_empty_file() -> Result<(), Box<dyn std::error::Error>> {
let temp_file = NamedTempFile::with_suffix(".json")?;
assert!(!is_json_archive(temp_file.path())?);
Ok(())
}
#[test]
fn test_not_archive_invalid_json() -> Result<(), Box<dyn std::error::Error>> {
let mut temp_file = NamedTempFile::with_suffix(".tmp")?;
writeln!(temp_file, "not valid json")?;
temp_file.flush()?;
assert!(!is_json_archive(temp_file.path())?);
Ok(())
}
}

286
src/diagnostics.rs Normal file
View file

@ -0,0 +1,286 @@
// json-archive is a tool for tracking JSON file changes over time
// Copyright (C) 2025 Peoples Grocers LLC
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published
// by the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
//
// To purchase a license under different terms contact admin@peoplesgrocers.com
// To request changes, report bugs, or give user feedback contact
// marxism@peoplesgrocers.com
//
use std::fmt;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum DiagnosticLevel {
Fatal,
Warning,
Info,
}
impl fmt::Display for DiagnosticLevel {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
DiagnosticLevel::Fatal => write!(f, "error"),
DiagnosticLevel::Warning => write!(f, "warning"),
DiagnosticLevel::Info => write!(f, "info"),
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum DiagnosticCode {
EmptyFile,
MissingHeader,
InvalidUtf8,
TruncatedJson,
MissingHeaderField,
UnsupportedVersion,
InvalidTimestamp,
InvalidInitialState,
InvalidEventJson,
UnknownEventType,
WrongFieldCount,
WrongFieldType,
NonExistentObservationId,
DuplicateObservationId,
ChangeCountMismatch,
InvalidChangeCount,
InvalidPointerSyntax,
PathNotFound,
InvalidArrayIndex,
ArrayIndexOutOfBounds,
ParentPathNotFound,
TypeMismatch,
OldValueMismatch,
MoveOnNonArray,
MoveIndexOutOfBounds,
InvalidMoveIndex,
SnapshotStateMismatch,
SnapshotTimestampOrder,
}
impl DiagnosticCode {
pub fn as_str(&self) -> &'static str {
match self {
DiagnosticCode::EmptyFile => "E001",
DiagnosticCode::MissingHeader => "E002",
DiagnosticCode::InvalidUtf8 => "E003",
DiagnosticCode::TruncatedJson => "E004",
DiagnosticCode::MissingHeaderField => "E010",
DiagnosticCode::UnsupportedVersion => "E011",
DiagnosticCode::InvalidTimestamp => "W012",
DiagnosticCode::InvalidInitialState => "E013",
DiagnosticCode::InvalidEventJson => "E020",
DiagnosticCode::UnknownEventType => "W021",
DiagnosticCode::WrongFieldCount => "E022",
DiagnosticCode::WrongFieldType => "E023",
DiagnosticCode::NonExistentObservationId => "E030",
DiagnosticCode::DuplicateObservationId => "W031",
DiagnosticCode::ChangeCountMismatch => "W040",
DiagnosticCode::InvalidChangeCount => "E041",
DiagnosticCode::InvalidPointerSyntax => "E050",
DiagnosticCode::PathNotFound => "E051",
DiagnosticCode::InvalidArrayIndex => "E052",
DiagnosticCode::ArrayIndexOutOfBounds => "E053",
DiagnosticCode::ParentPathNotFound => "E054",
DiagnosticCode::TypeMismatch => "E060",
DiagnosticCode::OldValueMismatch => "W061",
DiagnosticCode::MoveOnNonArray => "E070",
DiagnosticCode::MoveIndexOutOfBounds => "E071",
DiagnosticCode::InvalidMoveIndex => "E072",
DiagnosticCode::SnapshotStateMismatch => "W080",
DiagnosticCode::SnapshotTimestampOrder => "W081",
}
}
pub fn title(&self) -> &'static str {
match self {
DiagnosticCode::EmptyFile => "Empty file",
DiagnosticCode::MissingHeader => "Missing header",
DiagnosticCode::InvalidUtf8 => "Invalid UTF-8 encoding",
DiagnosticCode::TruncatedJson => "Truncated JSON",
DiagnosticCode::MissingHeaderField => "Missing required header field",
DiagnosticCode::UnsupportedVersion => "Unsupported version",
DiagnosticCode::InvalidTimestamp => "Invalid timestamp",
DiagnosticCode::InvalidInitialState => "Invalid initial state",
DiagnosticCode::InvalidEventJson => "Invalid event JSON",
DiagnosticCode::UnknownEventType => "Unknown event type",
DiagnosticCode::WrongFieldCount => "Wrong field count",
DiagnosticCode::WrongFieldType => "Wrong field type",
DiagnosticCode::NonExistentObservationId => "Non-existent observation ID",
DiagnosticCode::DuplicateObservationId => "Duplicate observation ID",
DiagnosticCode::ChangeCountMismatch => "Change count mismatch",
DiagnosticCode::InvalidChangeCount => "Invalid change count",
DiagnosticCode::InvalidPointerSyntax => "Invalid JSON Pointer syntax",
DiagnosticCode::PathNotFound => "Path not found",
DiagnosticCode::InvalidArrayIndex => "Invalid array index",
DiagnosticCode::ArrayIndexOutOfBounds => "Array index out of bounds",
DiagnosticCode::ParentPathNotFound => "Parent path not found",
DiagnosticCode::TypeMismatch => "Type mismatch",
DiagnosticCode::OldValueMismatch => "Old value mismatch",
DiagnosticCode::MoveOnNonArray => "Move operation on non-array",
DiagnosticCode::MoveIndexOutOfBounds => "Move index out of bounds",
DiagnosticCode::InvalidMoveIndex => "Invalid move index",
DiagnosticCode::SnapshotStateMismatch => "Snapshot state mismatch",
DiagnosticCode::SnapshotTimestampOrder => "Snapshot timestamp out of order",
}
}
}
#[derive(Debug, Clone)]
pub struct Diagnostic {
pub filename: Option<String>,
pub line_number: Option<usize>,
pub column: Option<usize>,
pub level: DiagnosticLevel,
pub code: DiagnosticCode,
pub description: String,
pub code_snippet: Option<String>,
pub advice: Option<String>,
}
impl Diagnostic {
pub fn new(level: DiagnosticLevel, code: DiagnosticCode, description: String) -> Self {
Self {
filename: None,
line_number: None,
column: None,
level,
code,
description,
code_snippet: None,
advice: None,
}
}
pub fn with_location(mut self, filename: String, line_number: usize) -> Self {
self.filename = Some(filename);
self.line_number = Some(line_number);
self
}
pub fn with_column(mut self, column: usize) -> Self {
self.column = Some(column);
self
}
pub fn with_snippet(mut self, snippet: String) -> Self {
self.code_snippet = Some(snippet);
self
}
pub fn with_advice(mut self, advice: String) -> Self {
self.advice = Some(advice);
self
}
pub fn is_fatal(&self) -> bool {
self.level == DiagnosticLevel::Fatal
}
}
impl fmt::Display for Diagnostic {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if let (Some(filename), Some(line)) = (&self.filename, self.line_number) {
if let Some(col) = self.column {
write!(f, "{}:{}:{} - ", filename, line, col)?;
} else {
write!(f, "{}:{} - ", filename, line)?;
}
}
writeln!(
f,
"{} {}: {}",
self.level,
self.code.as_str(),
self.code.title()
)?;
writeln!(f)?;
writeln!(f, "{}", self.description)?;
if let Some(snippet) = &self.code_snippet {
writeln!(f)?;
writeln!(f, "{}", snippet)?;
}
if let Some(advice) = &self.advice {
writeln!(f)?;
writeln!(f, "{}", advice)?;
}
Ok(())
}
}
#[derive(Debug, Default)]
pub struct DiagnosticCollector {
diagnostics: Vec<Diagnostic>,
}
impl DiagnosticCollector {
pub fn new() -> Self {
Self {
diagnostics: Vec::new(),
}
}
pub fn add(&mut self, diagnostic: Diagnostic) {
self.diagnostics.push(diagnostic);
}
pub fn has_fatal(&self) -> bool {
self.diagnostics.iter().any(|d| d.is_fatal())
}
pub fn diagnostics(&self) -> &[Diagnostic] {
&self.diagnostics
}
pub fn into_diagnostics(self) -> Vec<Diagnostic> {
self.diagnostics
}
pub fn is_empty(&self) -> bool {
self.diagnostics.is_empty()
}
pub fn len(&self) -> usize {
self.diagnostics.len()
}
}

345
src/diff.rs Normal file
View file

@ -0,0 +1,345 @@
// json-archive is a tool for tracking JSON file changes over time
// Copyright (C) 2025 Peoples Grocers LLC
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published
// by the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
//
// To purchase a license under different terms contact admin@peoplesgrocers.com
// To request changes, report bugs, or give user feedback contact
// marxism@peoplesgrocers.com
//
use serde_json::Value;
use std::collections::{HashMap, HashSet};
use crate::events::Event;
pub fn diff(old: &Value, new: &Value, base_path: &str, observation_id: &str) -> Vec<Event> {
let mut result = Vec::<Event>::new();
diff_recursive(old, new, base_path, observation_id, &mut result);
result
}
fn diff_recursive(
old: &Value,
new: &Value,
path: &str,
observation_id: &str,
result: &mut Vec<Event>,
) {
match (old, new) {
(Value::Object(old_obj), Value::Object(new_obj)) => {
diff_objects(old_obj, new_obj, path, observation_id, result);
}
(Value::Array(old_arr), Value::Array(new_arr)) => {
diff_arrays(old_arr, new_arr, path, observation_id, result);
}
_ => {
if old != new {
result.push(Event::Change {
path: path.to_string(),
new_value: new.clone(),
observation_id: observation_id.to_string(),
});
}
}
}
}
fn diff_objects(
old: &serde_json::Map<String, Value>,
new: &serde_json::Map<String, Value>,
base_path: &str,
observation_id: &str,
result: &mut Vec<Event>,
) {
let old_keys: HashSet<&String> = old.keys().collect();
let new_keys: HashSet<&String> = new.keys().collect();
// Removed keys
for key in old_keys.difference(&new_keys) {
let path = format_path(base_path, key);
result.push(Event::Remove {
path,
observation_id: observation_id.to_string(),
});
}
// Added keys
for key in new_keys.difference(&old_keys) {
let path = format_path(base_path, key);
result.push(Event::Add {
path,
value: new[*key].clone(),
observation_id: observation_id.to_string(),
});
}
// Changed keys
for key in old_keys.intersection(&new_keys) {
let path = format_path(base_path, key);
let old_value = &old[*key];
let new_value = &new[*key];
if old_value != new_value {
diff_recursive(old_value, new_value, &path, observation_id, result);
}
}
}
fn diff_arrays(
old: &[Value],
new: &[Value],
base_path: &str,
observation_id: &str,
result: &mut Vec<Event>,
) {
// Simple implementation: we'll use a more sophisticated approach for move detection
let mut old_items: HashMap<String, (usize, &Value)> = HashMap::new();
let mut new_items: HashMap<String, (usize, &Value)> = HashMap::new();
// Create content-based indices for move detection
for (i, value) in old.iter().enumerate() {
let key = value_hash(value);
old_items.insert(key, (i, value));
}
for (i, value) in new.iter().enumerate() {
let key = value_hash(value);
new_items.insert(key, (i, value));
}
let old_keys: HashSet<&String> = old_items.keys().collect();
let new_keys: HashSet<&String> = new_items.keys().collect();
let common_keys: HashSet<&String> = old_keys.intersection(&new_keys).cloned().collect();
// Track which items have been processed
let mut processed_old: HashSet<usize> = HashSet::new();
let mut processed_new: HashSet<usize> = HashSet::new();
// Handle items that exist in both arrays (potential moves or unchanged)
let mut moves: Vec<(usize, usize)> = Vec::new();
for key in &common_keys {
let (old_idx, old_val) = old_items[*key];
let (new_idx, new_val) = new_items[*key];
processed_old.insert(old_idx);
processed_new.insert(new_idx);
if old_val != new_val {
// Value changed
let path = format!("{}/{}", base_path, new_idx);
result.push(Event::Change {
path,
new_value: new_val.clone(),
observation_id: observation_id.to_string(),
});
} else if old_idx != new_idx {
// Same value, different position - this is a move
moves.push((old_idx, new_idx));
}
}
// Generate move events if any
if !moves.is_empty() {
// Sort moves by original position to ensure consistent ordering
moves.sort_by_key(|(old_idx, _)| *old_idx);
result.push(Event::Move {
path: base_path.to_string(),
moves,
observation_id: observation_id.to_string(),
});
}
// Handle removed items (in old but not in new)
let mut removed_indices: Vec<usize> = (0..old.len())
.filter(|i| !processed_old.contains(i))
.collect();
// Remove from highest index to lowest to avoid index shifting issues
removed_indices.sort_by(|a, b| b.cmp(a));
for idx in removed_indices {
let path = format!("{}/{}", base_path, idx);
result.push(Event::Remove {
path,
observation_id: observation_id.to_string(),
});
}
// Handle added items (in new but not in old)
for i in 0..new.len() {
if !processed_new.contains(&i) {
let path = format!("{}/{}", base_path, i);
result.push(Event::Add {
path,
value: new[i].clone(),
observation_id: observation_id.to_string(),
});
}
}
}
fn format_path(base: &str, segment: &str) -> String {
let escaped_segment = segment.replace("~", "~0").replace("/", "~1");
if base.is_empty() {
format!("/{}", escaped_segment)
} else {
format!("{}/{}", base, escaped_segment)
}
}
fn value_hash(value: &Value) -> String {
// Simple content-based hash for move detection
// In a real implementation, you might want a more sophisticated hash
format!("{:?}", value)
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
#[test]
fn test_object_add() {
let old = json!({"a": 1});
let new = json!({"a": 1, "b": 2});
let result = diff(&old, &new, "", "obs-1");
assert_eq!(result.len(), 1);
match &result[0] {
Event::Add { path, value, .. } => {
assert_eq!(path, "/b");
assert_eq!(value, &json!(2));
}
_ => panic!("Expected Add event"),
}
}
#[test]
fn test_object_remove() {
let old = json!({"a": 1, "b": 2});
let new = json!({"a": 1});
let result = diff(&old, &new, "", "obs-1");
assert_eq!(result.len(), 1);
match &result[0] {
Event::Remove { path, .. } => {
assert_eq!(path, "/b");
}
_ => panic!("Expected Remove event"),
}
}
#[test]
fn test_object_change() {
let old = json!({"a": 1});
let new = json!({"a": 2});
let result = diff(&old, &new, "", "obs-1");
assert_eq!(result.len(), 1);
match &result[0] {
Event::Change {
path, new_value, ..
} => {
assert_eq!(path, "/a");
assert_eq!(new_value, &json!(2));
}
_ => panic!("Expected Change event"),
}
}
#[test]
fn test_nested_object() {
let old = json!({"user": {"name": "Alice", "age": 30}});
let new = json!({"user": {"name": "Alice", "age": 31}});
let result = diff(&old, &new, "", "obs-1");
assert_eq!(result.len(), 1);
match &result[0] {
Event::Change {
path, new_value, ..
} => {
assert_eq!(path, "/user/age");
assert_eq!(new_value, &json!(31));
}
_ => panic!("Expected Change event"),
}
}
#[test]
fn test_array_add() {
let old = json!(["a", "b"]);
let new = json!(["a", "b", "c"]);
let result = diff(&old, &new, "", "obs-1");
assert_eq!(result.len(), 1);
match &result[0] {
Event::Add { path, value, .. } => {
assert_eq!(path, "/2");
assert_eq!(value, &json!("c"));
}
_ => panic!("Expected Add event"),
}
}
#[test]
fn test_array_remove() {
let old = json!(["a", "b", "c"]);
let new = json!(["a", "b"]);
let result = diff(&old, &new, "", "obs-1");
assert_eq!(result.len(), 1);
match &result[0] {
Event::Remove { path, .. } => {
assert_eq!(path, "/2");
}
_ => panic!("Expected Remove event"),
}
}
#[test]
fn test_array_move() {
let old = json!(["a", "b", "c"]);
let new = json!(["c", "a", "b"]);
let result = diff(&old, &new, "", "obs-1");
// Should generate move events
assert!(!result.is_empty());
// Check if we have a move event
let has_move = result.iter().any(|e| matches!(e, Event::Move { .. }));
assert!(has_move, "Expected at least one Move event");
}
#[test]
fn test_escape_sequences_in_keys() {
let old = json!({});
let new = json!({"foo/bar": "value", "foo~bar": "value2"});
let result = diff(&old, &new, "", "obs-1");
assert_eq!(result.len(), 2);
let paths: Vec<&String> = result
.iter()
.filter_map(|e| match e {
Event::Add { path, .. } => Some(path),
_ => None,
})
.collect();
assert!(paths.contains(&&"/foo~1bar".to_string()));
assert!(paths.contains(&&"/foo~0bar".to_string()));
}
}

452
src/events.rs Normal file
View file

@ -0,0 +1,452 @@
// json-archive is a tool for tracking JSON file changes over time
// Copyright (C) 2025 Peoples Grocers LLC
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published
// by the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
//
// To purchase a license under different terms contact admin@peoplesgrocers.com
// To request changes, report bugs, or give user feedback contact
// marxism@peoplesgrocers.com
//
use chrono::{DateTime, Utc};
use serde::de::{self, SeqAccess, Visitor};
use serde::{Deserialize, Deserializer, Serialize, Serializer};
use serde_json::Value;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Header {
/// Type identifier for magic file detection. We put this first to act as a "poor man's
/// magic file number detection" when the archive file has an unexpected extension.
/// This helps avoid issues like the Elm compiler that requires specific file extensions
/// (e.g., .js) which doesn't play nice with build systems using temporary files with
/// arbitrary suffixes. By putting this key first, we can detect archive files even
/// when they have non-standard extensions like .tmp appended by build tools.
#[serde(rename = "type")]
pub file_type: String,
pub version: u32,
pub created: DateTime<Utc>,
#[serde(skip_serializing_if = "Option::is_none")]
pub source: Option<String>,
pub initial: Value,
#[serde(skip_serializing_if = "Option::is_none")]
pub metadata: Option<Value>,
}
impl Header {
pub fn new(initial: Value, source: Option<String>) -> Self {
Self {
file_type: "@peoplesgrocers/json-archive".to_string(),
version: 1,
created: Utc::now(),
source,
initial,
metadata: None,
}
}
}
#[derive(Debug, Clone)]
pub enum Event {
Observe {
observation_id: String,
timestamp: DateTime<Utc>,
change_count: usize,
},
Add {
path: String,
value: Value,
observation_id: String,
},
Change {
path: String,
new_value: Value,
observation_id: String,
},
Remove {
path: String,
observation_id: String,
},
Move {
path: String,
moves: Vec<(usize, usize)>,
observation_id: String,
},
Snapshot {
observation_id: String,
timestamp: DateTime<Utc>,
object: Value,
},
}
impl Serialize for Event {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
use serde::ser::SerializeSeq;
match self {
Event::Observe {
observation_id,
timestamp,
change_count,
} => {
let mut seq = serializer.serialize_seq(Some(4))?;
seq.serialize_element("observe")?;
seq.serialize_element(observation_id)?;
seq.serialize_element(timestamp)?;
seq.serialize_element(change_count)?;
seq.end()
}
Event::Add {
path,
value,
observation_id,
} => {
let mut seq = serializer.serialize_seq(Some(4))?;
seq.serialize_element("add")?;
seq.serialize_element(path)?;
seq.serialize_element(value)?;
seq.serialize_element(observation_id)?;
seq.end()
}
Event::Change {
path,
new_value,
observation_id,
} => {
let mut seq = serializer.serialize_seq(Some(4))?;
seq.serialize_element("change")?;
seq.serialize_element(path)?;
seq.serialize_element(new_value)?;
seq.serialize_element(observation_id)?;
seq.end()
}
Event::Remove {
path,
observation_id,
} => {
let mut seq = serializer.serialize_seq(Some(3))?;
seq.serialize_element("remove")?;
seq.serialize_element(path)?;
seq.serialize_element(observation_id)?;
seq.end()
}
Event::Move {
path,
moves,
observation_id,
} => {
let mut seq = serializer.serialize_seq(Some(4))?;
seq.serialize_element("move")?;
seq.serialize_element(path)?;
seq.serialize_element(moves)?;
seq.serialize_element(observation_id)?;
seq.end()
}
Event::Snapshot {
observation_id,
timestamp,
object,
} => {
let mut seq = serializer.serialize_seq(Some(4))?;
seq.serialize_element("snapshot")?;
seq.serialize_element(observation_id)?;
seq.serialize_element(timestamp)?;
seq.serialize_element(object)?;
seq.end()
}
}
}
}
struct EventVisitor;
impl<'de> Visitor<'de> for EventVisitor {
type Value = Event;
fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
formatter.write_str("a JSON array representing an Event")
}
fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
where
A: SeqAccess<'de>,
{
let event_type: String = seq
.next_element()?
.ok_or_else(|| de::Error::missing_field("event type"))?;
match event_type.as_str() {
"observe" => {
let observation_id: String = seq
.next_element()?
.ok_or_else(|| de::Error::missing_field("observation_id"))?;
let timestamp: DateTime<Utc> = seq
.next_element()?
.ok_or_else(|| de::Error::missing_field("timestamp"))?;
let change_count: usize = seq
.next_element()?
.ok_or_else(|| de::Error::missing_field("change_count"))?;
Ok(Event::Observe {
observation_id,
timestamp,
change_count,
})
}
"add" => {
let path: String = seq
.next_element()?
.ok_or_else(|| de::Error::missing_field("path"))?;
let value: Value = seq
.next_element()?
.ok_or_else(|| de::Error::missing_field("value"))?;
let observation_id: String = seq
.next_element()?
.ok_or_else(|| de::Error::missing_field("observation_id"))?;
Ok(Event::Add {
path,
value,
observation_id,
})
}
"change" => {
let path: String = seq
.next_element()?
.ok_or_else(|| de::Error::missing_field("path"))?;
let new_value: Value = seq
.next_element()?
.ok_or_else(|| de::Error::missing_field("new_value"))?;
let observation_id: String = seq
.next_element()?
.ok_or_else(|| de::Error::missing_field("observation_id"))?;
Ok(Event::Change {
path,
new_value,
observation_id,
})
}
"remove" => {
let path: String = seq
.next_element()?
.ok_or_else(|| de::Error::missing_field("path"))?;
let observation_id: String = seq
.next_element()?
.ok_or_else(|| de::Error::missing_field("observation_id"))?;
Ok(Event::Remove {
path,
observation_id,
})
}
"move" => {
let path: String = seq
.next_element()?
.ok_or_else(|| de::Error::missing_field("path"))?;
let moves: Vec<(usize, usize)> = seq
.next_element()?
.ok_or_else(|| de::Error::missing_field("moves"))?;
let observation_id: String = seq
.next_element()?
.ok_or_else(|| de::Error::missing_field("observation_id"))?;
Ok(Event::Move {
path,
moves,
observation_id,
})
}
"snapshot" => {
let observation_id: String = seq
.next_element()?
.ok_or_else(|| de::Error::missing_field("observation_id"))?;
let timestamp: DateTime<Utc> = seq
.next_element()?
.ok_or_else(|| de::Error::missing_field("timestamp"))?;
let object: Value = seq
.next_element()?
.ok_or_else(|| de::Error::missing_field("object"))?;
Ok(Event::Snapshot {
observation_id,
timestamp,
object,
})
}
_ => Err(de::Error::unknown_variant(
&event_type,
&["observe", "add", "change", "remove", "move", "snapshot"],
)),
}
}
}
impl<'de> Deserialize<'de> for Event {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
deserializer.deserialize_seq(EventVisitor)
}
}
#[derive(Debug, Clone)]
pub struct Observation {
pub id: String,
pub timestamp: DateTime<Utc>,
pub events: Vec<Event>,
}
impl Observation {
pub fn new(id: String, timestamp: DateTime<Utc>) -> Self {
Self {
id,
timestamp,
events: Vec::new(),
}
}
pub fn add_event(&mut self, event: Event) {
self.events.push(event);
}
pub fn to_events(self) -> Vec<Event> {
let mut result = vec![Event::Observe {
observation_id: self.id.clone(),
timestamp: self.timestamp,
change_count: self.events.len(),
}];
result.extend(self.events);
result
}
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
#[test]
fn test_header_serialization() {
let header = Header::new(json!({"test": "value"}), Some("test-source".to_string()));
let serialized = serde_json::to_string(&header).unwrap();
let deserialized: Header = serde_json::from_str(&serialized).unwrap();
assert_eq!(header.file_type, deserialized.file_type);
assert_eq!(header.version, deserialized.version);
assert_eq!(header.initial, deserialized.initial);
assert_eq!(header.source, deserialized.source);
}
#[test]
fn test_event_serialization() {
let timestamp = Utc::now();
// Test observe event
let observe_event = Event::Observe {
observation_id: "obs-1".to_string(),
timestamp,
change_count: 2,
};
let serialized = serde_json::to_string(&observe_event).unwrap();
let expected_array = json!(["observe", "obs-1", timestamp, 2]);
assert_eq!(
serde_json::from_str::<Value>(&serialized).unwrap(),
expected_array
);
// Test add event
let add_event = Event::Add {
path: "/test".to_string(),
value: json!("value"),
observation_id: "obs-1".to_string(),
};
let serialized = serde_json::to_string(&add_event).unwrap();
let expected_array = json!(["add", "/test", "value", "obs-1"]);
assert_eq!(
serde_json::from_str::<Value>(&serialized).unwrap(),
expected_array
);
// Test all event types for serialization/deserialization round-trip
let events = vec![
Event::Observe {
observation_id: "obs-1".to_string(),
timestamp,
change_count: 2,
},
Event::Add {
path: "/test".to_string(),
value: json!("value"),
observation_id: "obs-1".to_string(),
},
Event::Change {
path: "/test".to_string(),
new_value: json!("new"),
observation_id: "obs-1".to_string(),
},
Event::Remove {
path: "/test".to_string(),
observation_id: "obs-1".to_string(),
},
Event::Move {
path: "/items".to_string(),
moves: vec![(0, 1)],
observation_id: "obs-1".to_string(),
},
Event::Snapshot {
observation_id: "snap-1".to_string(),
timestamp,
object: json!({"test": "state"}),
},
];
for event in events {
let serialized = serde_json::to_string(&event).unwrap();
// Verify it's serialized as an array
let as_value: Value = serde_json::from_str(&serialized).unwrap();
assert!(as_value.is_array(), "Event should serialize to JSON array");
// Verify round-trip serialization/deserialization works
let deserialized: Event = serde_json::from_str(&serialized).unwrap();
let reserialized = serde_json::to_string(&deserialized).unwrap();
assert_eq!(
serialized, reserialized,
"Round-trip serialization should be identical"
);
}
}
#[test]
fn test_observation_to_events() {
let mut obs = Observation::new("obs-1".to_string(), Utc::now());
obs.add_event(Event::Add {
path: "/test".to_string(),
value: json!("value"),
observation_id: "obs-1".to_string(),
});
obs.add_event(Event::Change {
path: "/test".to_string(),
new_value: json!("new"),
observation_id: "obs-1".to_string(),
});
let events = obs.to_events();
assert_eq!(events.len(), 3); // observe + 2 events
match &events[0] {
Event::Observe { change_count, .. } => assert_eq!(*change_count, 2),
_ => panic!("First event should be observe"),
}
}
}

71
src/flags.rs Normal file
View file

@ -0,0 +1,71 @@
// json-archive is a tool for tracking JSON file changes over time
// Copyright (C) 2025 Peoples Grocers LLC
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published
// by the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
//
// To purchase a license under different terms contact admin@peoplesgrocers.com
// To request changes, report bugs, or give user feedback contact
// marxism@peoplesgrocers.com
//
use std::path::PathBuf;
xflags::xflags! {
cmd json-archive {
default cmd create {
/// Input JSON files in chronological order (first file determines default output name)
repeated inputs: PathBuf
/// Output archive file path (defaults to first input + .json.archive)
optional -o, --output output: PathBuf
/// Insert snapshot every N observations (optional)
optional -s, --snapshot-interval snapshot_interval: usize
/// Source identifier for archive metadata
optional --source source: String
}
cmd info {
/// Archive file to show information about
required file: PathBuf
/// Output format: human-readable (default) or json
optional --output output: String
}
cmd state {
/// Archive file to read state from
required file: PathBuf
/// Get state at specific observation ID
optional --id id: String
/// Get state at Nth observation in file order (not chronological)
optional --index index: usize
/// Get state as of this timestamp (most recent observation <= timestamp)
optional --as-of as_of: String
/// Get state right before this timestamp (most recent observation < timestamp)
optional --before before: String
/// Get state after this timestamp (earliest observation > timestamp)
optional --after after: String
/// Get latest state by timestamp (default if no other flags specified)
optional --latest latest: bool
}
}
}

38
src/lib.rs Normal file
View file

@ -0,0 +1,38 @@
// json-archive is a tool for tracking JSON file changes over time
// Copyright (C) 2025 Peoples Grocers LLC
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published
// by the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
//
// To purchase a license under different terms contact admin@peoplesgrocers.com
// To request changes, report bugs, or give user feedback contact
// marxism@peoplesgrocers.com
//
pub mod archive;
pub mod detection;
pub mod diagnostics;
pub mod diff;
pub mod events;
pub mod flags;
pub mod pointer;
pub mod reader;
pub use archive::{
append_to_archive, create_archive_from_files, default_output_filename, ArchiveBuilder, ArchiveWriter,
};
pub use detection::is_json_archive;
pub use diagnostics::{Diagnostic, DiagnosticCode, DiagnosticCollector, DiagnosticLevel};
pub use events::{Event, Header, Observation};
pub use pointer::JsonPointer;
pub use reader::{ArchiveReader, ReadMode, ReadResult};

152
src/main.rs Normal file
View file

@ -0,0 +1,152 @@
// json-archive is a tool for tracking JSON file changes over time
// Copyright (C) 2025 Peoples Grocers LLC
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published
// by the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
//
// To purchase a license under different terms contact admin@peoplesgrocers.com
// To request changes, report bugs, or give user feedback contact
// marxism@peoplesgrocers.com
//
use json_archive::{
append_to_archive, create_archive_from_files, default_output_filename, is_json_archive, Diagnostic,
DiagnosticCode, DiagnosticLevel,
};
use std::path::Path;
use std::process;
mod cmd;
mod flags;
fn main() {
let flags = flags::JsonArchive::from_env_or_exit();
let diagnostics = run(flags);
for diagnostic in &diagnostics {
eprintln!("{}", diagnostic);
}
let has_fatal = diagnostics.iter().any(|d| d.is_fatal());
if has_fatal {
process::exit(1);
}
}
fn run(flags: flags::JsonArchive) -> Vec<Diagnostic> {
match flags.subcommand {
flags::JsonArchiveCmd::Create(create_flags) => create_archive(&create_flags),
flags::JsonArchiveCmd::Info(info_flags) => cmd::info::run(&info_flags),
flags::JsonArchiveCmd::State(state_flags) => cmd::state::run(&state_flags),
}
}
fn create_archive(flags: &flags::Create) -> Vec<Diagnostic> {
if flags.inputs.is_empty() {
return vec![Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::MissingHeaderField,
"I need at least one JSON file to create an archive, but you didn't provide any."
.to_string(),
)
.with_advice(
"Usage: json-archive <file1.json> [file2.json ...]\n\n\
The first file will be used as the initial state, and subsequent files \
will be compared to generate change events."
.to_string(),
)];
}
let output_path = match &flags.output {
Some(path) => path.clone(),
None => default_output_filename(&flags.inputs[0]),
};
let mut diagnostics = Vec::new();
for input_path in &flags.inputs {
if !Path::new(input_path).exists() {
diagnostics.push(
Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::PathNotFound,
format!("I couldn't find the input file: {}", input_path.display()),
)
.with_advice(
"Make sure the file path is correct and the file exists. \
Check for typos in the filename."
.to_string(),
),
);
}
}
if !diagnostics.is_empty() {
return diagnostics;
}
let first_is_archive = match is_json_archive(&flags.inputs[0]) {
Ok(is_archive) => is_archive,
Err(e) => {
return vec![Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::PathNotFound,
format!("I couldn't check if the first file is an archive: {}", e),
)];
}
};
if first_is_archive {
println!("First input appears to be a JSON archive file");
if flags.inputs.len() == 1 {
return vec![
Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::MissingHeaderField,
"I found that the first input is already an archive file, but you didn't provide any additional JSON files to append.".to_string()
)
.with_advice(
"If you want to append to an archive, provide additional JSON files:\n\
json-archive existing.json.archive new1.json new2.json"
.to_string()
)
];
}
return append_to_archive(&flags.inputs[0], &flags.inputs[1..], &output_path, flags.source.clone(), flags.snapshot_interval);
}
println!("Creating archive: {}", output_path.display());
println!("Input files: {:?}", flags.inputs);
if let Some(interval) = flags.snapshot_interval {
println!("Snapshot interval: every {} observations", interval);
}
if let Some(ref source) = flags.source {
println!("Source: {}", source);
}
match create_archive_from_files(
&flags.inputs,
output_path.clone(),
flags.source.clone(),
flags.snapshot_interval,
) {
Ok(()) => {
println!("Archive created successfully: {}", output_path.display());
Vec::new()
}
Err(diagnostics) => diagnostics,
}
}

414
src/pointer.rs Normal file
View file

@ -0,0 +1,414 @@
// json-archive is a tool for tracking JSON file changes over time
// Copyright (C) 2025 Peoples Grocers LLC
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published
// by the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
//
// To purchase a license under different terms contact admin@peoplesgrocers.com
// To request changes, report bugs, or give user feedback contact
// marxism@peoplesgrocers.com
//
use crate::diagnostics::{Diagnostic, DiagnosticCode, DiagnosticLevel};
use serde_json::Value;
#[derive(Debug, Clone, PartialEq)]
pub struct JsonPointer {
tokens: Vec<String>,
}
impl JsonPointer {
pub fn new(path: &str) -> Result<Self, Diagnostic> {
if path.is_empty() {
return Ok(JsonPointer { tokens: vec![] });
}
if !path.starts_with('/') {
return Err(Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::InvalidPointerSyntax,
format!(
"I couldn't parse the path '{}': Path must start with '/'",
path
),
));
}
let tokens = path[1..]
.split('/')
.map(|token| token.replace("~1", "/").replace("~0", "~"))
.collect();
Ok(JsonPointer { tokens })
}
pub fn get<'a>(&self, value: &'a Value) -> Result<&'a Value, Diagnostic> {
let mut current = value;
for token in &self.tokens {
match current {
Value::Object(obj) => {
current = obj.get(token).ok_or_else(|| {
Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::PathNotFound,
format!("I couldn't find the key '{}'", token),
)
})?;
}
Value::Array(arr) => {
let index = token.parse::<usize>().map_err(|_| {
Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::InvalidArrayIndex,
format!("I couldn't parse '{}' as an array index", token),
)
})?;
current = arr.get(index).ok_or_else(|| {
Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::PathNotFound,
format!(
"I couldn't find index {} (array length is {})",
index,
arr.len()
),
)
})?;
}
_ => {
return Err(Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::TypeMismatch,
format!(
"I can't index into {} with '{}'",
current.type_name(),
token
),
));
}
}
}
Ok(current)
}
pub fn set(&self, value: &mut Value, new_value: Value) -> Result<(), Diagnostic> {
if self.tokens.is_empty() {
*value = new_value;
return Ok(());
}
let mut current = value;
let last_token = &self.tokens[self.tokens.len() - 1];
for token in &self.tokens[..self.tokens.len() - 1] {
match current {
Value::Object(obj) => {
current = obj.get_mut(token).ok_or_else(|| {
Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::PathNotFound,
format!("I couldn't find the key '{}'", token),
)
})?;
}
Value::Array(arr) => {
let index = token.parse::<usize>().map_err(|_| {
Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::InvalidArrayIndex,
format!("I couldn't parse '{}' as an array index", token),
)
})?;
let array_len = arr.len();
current = arr.get_mut(index).ok_or_else(|| {
Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::PathNotFound,
format!(
"I couldn't find index {} (array length is {})",
index, array_len
),
)
})?;
}
_ => {
return Err(Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::TypeMismatch,
format!(
"I can't index into {} with '{}'",
current.type_name(),
token
),
));
}
}
}
match current {
Value::Object(obj) => {
obj.insert(last_token.clone(), new_value);
}
Value::Array(arr) => {
let index = last_token.parse::<usize>().map_err(|_| {
Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::InvalidArrayIndex,
format!("I couldn't parse '{}' as an array index", last_token),
)
})?;
if index == arr.len() {
arr.push(new_value);
} else if index < arr.len() {
arr[index] = new_value;
} else {
return Err(Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::PathNotFound,
format!(
"I couldn't set index {} (array length is {})",
index,
arr.len()
),
));
}
}
_ => {
return Err(Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::TypeMismatch,
format!(
"I can't set property '{}' on {}",
last_token,
current.type_name()
),
));
}
}
Ok(())
}
pub fn remove(&self, value: &mut Value) -> Result<Value, Diagnostic> {
if self.tokens.is_empty() {
return Err(Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::InvalidPointerSyntax,
"I can't remove the root value".to_string(),
));
}
let mut current = value;
let last_token = &self.tokens[self.tokens.len() - 1];
for token in &self.tokens[..self.tokens.len() - 1] {
match current {
Value::Object(obj) => {
current = obj.get_mut(token).ok_or_else(|| {
Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::PathNotFound,
format!("I couldn't find the key '{}'", token),
)
})?;
}
Value::Array(arr) => {
let index = token.parse::<usize>().map_err(|_| {
Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::InvalidArrayIndex,
format!("I couldn't parse '{}' as an array index", token),
)
})?;
let array_len = arr.len();
current = arr.get_mut(index).ok_or_else(|| {
Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::PathNotFound,
format!(
"I couldn't find index {} (array length is {})",
index, array_len
),
)
})?;
}
_ => {
return Err(Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::TypeMismatch,
format!(
"I can't index into {} with '{}'",
current.type_name(),
token
),
));
}
}
}
match current {
Value::Object(obj) => obj.remove(last_token).ok_or_else(|| {
Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::PathNotFound,
format!("I couldn't find the key '{}' to remove", last_token),
)
}),
Value::Array(arr) => {
let index = last_token.parse::<usize>().map_err(|_| {
Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::InvalidArrayIndex,
format!("I couldn't parse '{}' as an array index", last_token),
)
})?;
if index < arr.len() {
Ok(arr.remove(index))
} else {
Err(Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::PathNotFound,
format!(
"I couldn't remove index {} (array length is {})",
index,
arr.len()
),
))
}
}
_ => Err(Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::TypeMismatch,
format!(
"I can't remove property '{}' from {}",
last_token,
current.type_name()
),
)),
}
}
pub fn to_string(&self) -> String {
if self.tokens.is_empty() {
return "".to_string();
}
let escaped_tokens: Vec<String> = self
.tokens
.iter()
.map(|token| token.replace("~", "~0").replace("/", "~1"))
.collect();
format!("/{}", escaped_tokens.join("/"))
}
}
trait ValueTypeExt {
fn type_name(&self) -> &'static str;
}
impl ValueTypeExt for Value {
fn type_name(&self) -> &'static str {
match self {
Value::Null => "null",
Value::Bool(_) => "boolean",
Value::Number(_) => "number",
Value::String(_) => "string",
Value::Array(_) => "array",
Value::Object(_) => "object",
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
#[test]
fn test_empty_pointer() {
let pointer = JsonPointer::new("").unwrap();
let value = json!({"foo": "bar"});
assert_eq!(pointer.get(&value).unwrap(), &value);
}
#[test]
fn test_simple_object_access() {
let pointer = JsonPointer::new("/foo").unwrap();
let value = json!({"foo": "bar"});
assert_eq!(pointer.get(&value).unwrap(), &json!("bar"));
}
#[test]
fn test_nested_object_access() {
let pointer = JsonPointer::new("/foo/bar").unwrap();
let value = json!({"foo": {"bar": "baz"}});
assert_eq!(pointer.get(&value).unwrap(), &json!("baz"));
}
#[test]
fn test_array_access() {
let pointer = JsonPointer::new("/items/0").unwrap();
let value = json!({"items": ["first", "second"]});
assert_eq!(pointer.get(&value).unwrap(), &json!("first"));
}
#[test]
fn test_escape_sequences() {
let pointer = JsonPointer::new("/foo~1bar").unwrap();
let value = json!({"foo/bar": "baz"});
assert_eq!(pointer.get(&value).unwrap(), &json!("baz"));
let pointer = JsonPointer::new("/foo~0bar").unwrap();
let value = json!({"foo~bar": "baz"});
assert_eq!(pointer.get(&value).unwrap(), &json!("baz"));
}
#[test]
fn test_set_object() {
let pointer = JsonPointer::new("/foo").unwrap();
let mut value = json!({"foo": "bar"});
pointer.set(&mut value, json!("new_value")).unwrap();
assert_eq!(value, json!({"foo": "new_value"}));
}
#[test]
fn test_set_array_append() {
let pointer = JsonPointer::new("/items/2").unwrap();
let mut value = json!({"items": ["first", "second"]});
pointer.set(&mut value, json!("third")).unwrap();
assert_eq!(value, json!({"items": ["first", "second", "third"]}));
}
#[test]
fn test_remove_object() {
let pointer = JsonPointer::new("/foo").unwrap();
let mut value = json!({"foo": "bar", "baz": "qux"});
let removed = pointer.remove(&mut value).unwrap();
assert_eq!(removed, json!("bar"));
assert_eq!(value, json!({"baz": "qux"}));
}
#[test]
fn test_remove_array() {
let pointer = JsonPointer::new("/items/0").unwrap();
let mut value = json!({"items": ["first", "second", "third"]});
let removed = pointer.remove(&mut value).unwrap();
assert_eq!(removed, json!("first"));
assert_eq!(value, json!({"items": ["second", "third"]}));
}
}

1005
src/reader.rs Normal file

File diff suppressed because it is too large Load diff