// json-archive is a tool for tracking JSON file changes over time
// Copyright (C) 2025 Peoples Grocers LLC
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published
// by the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see .
//
// To purchase a license under different terms contact admin@peoplesgrocers.com
// To request changes, report bugs, or give user feedback contact
// marxism@peoplesgrocers.com
//
use crate::flags;
use chrono::{DateTime, Utc};
use json_archive::archive_open::open_archive;
use json_archive::detection::CompressionFormat;
use json_archive::{read_events, Diagnostic, DiagnosticCode, DiagnosticLevel, Event};
use serde::Serialize;
use std::path::Path;
#[derive(Debug)]
struct ObservationInfo {
id: String,
timestamp: DateTime,
created: DateTime, // For initial state, this is the archive creation time
change_count: usize,
json_size: usize,
}
#[derive(Serialize)]
struct JsonObservation {
index: usize,
id: String,
timestamp: String,
changes: usize,
json_size: usize,
}
#[derive(Serialize)]
struct JsonInfoOutput {
archive: String,
compression: String,
created: String,
file_size: u64,
snapshot_count: usize,
observations: Vec,
total_json_size: u64,
efficiency_percent: f64,
}
pub fn run(flags: &flags::Info) -> Result<(), Vec> {
if !flags.file.exists() {
return Err(vec![Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::PathNotFound,
format!("I couldn't find the archive file: {}", flags.file.display()),
)
.with_advice(
"Make sure the file path is correct and the file exists. \
Check for typos in the filename."
.to_string(),
)]);
}
let (observations, snapshot_count, compression_format) = match collect_observations(&flags.file)
{
Ok((obs, count, format)) => (obs, count, format),
Err(diagnostics) => return Err(diagnostics),
};
let file_size = match std::fs::metadata(&flags.file) {
Ok(metadata) => metadata.len(),
Err(_) => 0,
};
// Calculate total JSON size (sum of all observations + newline separators)
let total_json_size: u64 = observations
.iter()
.map(|obs| obs.json_size as u64)
.sum::()
+ (observations.len() as u64).saturating_sub(1); // Add newlines between observations
let efficiency_percent = if total_json_size > 0 {
(file_size as f64 / total_json_size as f64) * 100.0
} else {
0.0
};
// Check output format
let is_json_output = flags.output.as_ref().map(|s| s == "json").unwrap_or(false);
if is_json_output {
// JSON output mode
if observations.is_empty() {
let empty_output = JsonInfoOutput {
archive: flags.file.display().to_string(),
compression: compression_format.to_string(),
created: "".to_string(),
file_size,
snapshot_count,
observations: Vec::new(),
total_json_size: 0,
efficiency_percent: 0.0,
};
println!(
"{}",
serde_json::to_string_pretty(&empty_output).unwrap_or_default()
);
return Ok(());
}
let json_observations: Vec = observations
.iter()
.enumerate()
.map(|(index, obs)| JsonObservation {
index,
id: if index == 0 {
"initial".to_string()
} else {
obs.id.clone()
},
timestamp: obs.timestamp.to_rfc3339(),
changes: obs.change_count,
json_size: obs.json_size,
})
.collect();
let json_output = JsonInfoOutput {
archive: flags.file.display().to_string(),
compression: compression_format.to_string(),
created: observations[0].created.to_rfc3339(),
file_size,
snapshot_count,
observations: json_observations,
total_json_size,
efficiency_percent,
};
println!(
"{}",
serde_json::to_string_pretty(&json_output).unwrap_or_default()
);
} else {
// Human-readable output mode
println!("Archive: {}", flags.file.display());
println!("Compression: {}", compression_format);
if observations.is_empty() {
println!("No observations found");
return Ok(());
}
let first_timestamp = &observations[0].created;
let last_timestamp = if observations.len() > 1 {
&observations.last().unwrap().timestamp
} else {
first_timestamp
};
println!("Created: {}", format_timestamp(first_timestamp));
println!();
if observations.len() == 1 {
println!("1 observation on {}", format_timestamp(first_timestamp));
} else {
println!(
"{} observations from {} to {}",
observations.len(),
format_timestamp(first_timestamp),
format_timestamp(last_timestamp)
);
}
println!();
// Table header
println!(" # Observation ID Date & Time Changes JSON Size");
println!("────────────────────────────────────────────────────────────────────────────────────────");
for (index, obs) in observations.iter().enumerate() {
let id_display = if index == 0 {
"(initial)".to_string()
} else {
truncate_id(&obs.id)
};
let changes_display = if index == 0 {
"-".to_string()
} else {
obs.change_count.to_string()
};
println!(
" {:2} {:32} {:25} {:7} {:9}",
index,
id_display,
format_timestamp(&obs.timestamp),
changes_display,
format_size(obs.json_size as u64)
);
}
println!();
let snapshot_text = if snapshot_count == 0 {
"0 snapshots".to_string()
} else {
format!("{} snapshots", snapshot_count)
};
let comparison = if efficiency_percent < 100.0 {
format!("{:.1}% smaller", 100.0 - efficiency_percent)
} else {
format!("{:.1}% larger", efficiency_percent - 100.0)
};
println!(
"Archive size: {} ({}, {} than JSON Lines)",
format_size(file_size),
snapshot_text,
comparison
);
println!("Data size: {}", format_size(total_json_size));
// Add usage instructions
println!();
println!("To get the JSON value at a specific observation:");
println!(" json-archive state --index <#> ");
println!(" json-archive state --id ");
}
Ok(())
}
fn collect_observations(
file_path: &Path,
) -> Result<(Vec, usize, CompressionFormat), Vec> {
let opened = open_archive(file_path)?;
let compression_format = opened.format;
let (initial_state, mut event_iter) =
read_events(opened.reader, &file_path.display().to_string())?;
// Check for fatal diagnostics from initial parsing
if event_iter.diagnostics.has_fatal() {
return Err(event_iter.diagnostics.diagnostics().to_vec());
}
let mut observations = Vec::new();
let mut current_state = initial_state.clone();
let mut snapshot_count = 0;
let initial_size = serde_json::to_string(&initial_state)
.unwrap_or_default()
.len();
let created = event_iter.header.created;
// Add initial state as observation 0
observations.push(ObservationInfo {
id: "initial".to_string(),
timestamp: created,
created,
change_count: 0,
json_size: initial_size,
});
// Iterate through events
while let Some(event) = event_iter.next() {
match event {
Event::Observe {
observation_id,
timestamp,
change_count,
} => {
observations.push(ObservationInfo {
id: observation_id,
timestamp,
created,
change_count,
json_size: 0, // Will be calculated after applying events
});
}
Event::Add { path, value, .. } => {
let _ = json_archive::apply_add(&mut current_state, &path, value);
// Update the JSON size of the last observation
if let Some(last_obs) = observations.last_mut() {
if last_obs.id != "initial" {
last_obs.json_size = serde_json::to_string(¤t_state)
.unwrap_or_default()
.len();
}
}
}
Event::Change {
path, new_value, ..
} => {
let _ = json_archive::apply_change(&mut current_state, &path, new_value);
// Update the JSON size of the last observation
if let Some(last_obs) = observations.last_mut() {
if last_obs.id != "initial" {
last_obs.json_size = serde_json::to_string(¤t_state)
.unwrap_or_default()
.len();
}
}
}
Event::Remove { path, .. } => {
let _ = json_archive::apply_remove(&mut current_state, &path);
// Update the JSON size of the last observation
if let Some(last_obs) = observations.last_mut() {
if last_obs.id != "initial" {
last_obs.json_size = serde_json::to_string(¤t_state)
.unwrap_or_default()
.len();
}
}
}
Event::Move { path, moves, .. } => {
let _ = json_archive::apply_move(&mut current_state, &path, moves);
// Update the JSON size of the last observation
if let Some(last_obs) = observations.last_mut() {
if last_obs.id != "initial" {
last_obs.json_size = serde_json::to_string(¤t_state)
.unwrap_or_default()
.len();
}
}
}
Event::Snapshot { object, .. } => {
current_state = object;
snapshot_count += 1;
// Update the JSON size of the last observation
if let Some(last_obs) = observations.last_mut() {
if last_obs.id != "initial" {
last_obs.json_size = serde_json::to_string(¤t_state)
.unwrap_or_default()
.len();
}
}
}
}
}
Ok((observations, snapshot_count, compression_format))
}
fn format_timestamp(dt: &DateTime) -> String {
dt.format("%a %H:%M:%S %d-%b-%Y").to_string()
}
fn truncate_id(id: &str) -> String {
if id.len() > 20 {
format!("{}...", &id[..20])
} else {
id.to_string()
}
}
fn format_size(bytes: u64) -> String {
if bytes < 1024 {
format!("{} bytes", bytes)
} else if bytes < 1024 * 1024 {
format!("{:.1} KB", bytes as f64 / 1024.0)
} else {
format!("{:.1} MB", bytes as f64 / (1024.0 * 1024.0))
}
}