refactor: decompose archive read/write into composable building blocks

Delete archive_context.rs and archive_ops.rs (1200+ lines of duplicated
logic). Replace with four focused modules:

1. open_archive() - opens a file, detects compression, returns raw bytes
2. read_archive() - parses bytes into validated observations
3. CompressionWriter - writes bytes with any compression format
4. WriteStrategy - given a list of files, determines input archive,
 output archive, output format, and which of four write modes to use:
  - Create: new archive, no input
  - Append: uncompressed input, seek to end
  - AtomicSwap: compressed input, rewrite via temp file
  - CopyOnWrite: different input/output paths, transcode between formats

Previously you could not specify output format. Appending always
preserved the input format, creating compressed archives didn't work.
Now all four cases work with any supported compression format.

Atomic swap now writes to temp file, then renames. Crash-safe.

Trade-off: This approach prioritizes code clarity over syscall efficiency.
  The archive file may be opened and read multiple times during a single
  operation (once for format detection, once for reading state, once for
  copying content). A more optimized implementation could reuse file
  handles, but the current approach makes each step's purpose obvious.
This commit is contained in:
nobody 2025-12-01 20:51:50 -08:00
commit b65103c9f7
Signed by: GrocerPublishAgent
GPG key ID: 43B1C298CDDE181C
28 changed files with 2055 additions and 2859 deletions

View file

@ -21,7 +21,9 @@
use crate::flags;
use chrono::{DateTime, Utc};
use json_archive::{Diagnostic, DiagnosticCode, DiagnosticLevel, Event};
use json_archive::archive_open::open_archive;
use json_archive::detection::CompressionFormat;
use json_archive::{read_events, Diagnostic, DiagnosticCode, DiagnosticLevel, Event};
use serde::Serialize;
use std::path::Path;
@ -46,6 +48,7 @@ struct JsonObservation {
#[derive(Serialize)]
struct JsonInfoOutput {
archive: String,
compression: String,
created: String,
file_size: u64,
snapshot_count: usize,
@ -54,9 +57,9 @@ struct JsonInfoOutput {
efficiency_percent: f64,
}
pub fn run(flags: &flags::Info) -> Vec<Diagnostic> {
pub fn run(flags: &flags::Info) -> Result<(), Vec<Diagnostic>> {
if !flags.file.exists() {
return vec![Diagnostic::new(
return Err(vec![Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::PathNotFound,
format!("I couldn't find the archive file: {}", flags.file.display()),
@ -65,12 +68,13 @@ pub fn run(flags: &flags::Info) -> Vec<Diagnostic> {
"Make sure the file path is correct and the file exists. \
Check for typos in the filename."
.to_string(),
)];
)]);
}
let (observations, snapshot_count) = match collect_observations(&flags.file) {
Ok((obs, count)) => (obs, count),
Err(diagnostics) => return diagnostics,
let (observations, snapshot_count, compression_format) = match collect_observations(&flags.file)
{
Ok((obs, count, format)) => (obs, count, format),
Err(diagnostics) => return Err(diagnostics),
};
let file_size = match std::fs::metadata(&flags.file) {
@ -79,7 +83,10 @@ pub fn run(flags: &flags::Info) -> Vec<Diagnostic> {
};
// Calculate total JSON size (sum of all observations + newline separators)
let total_json_size: u64 = observations.iter().map(|obs| obs.json_size as u64).sum::<u64>()
let total_json_size: u64 = observations
.iter()
.map(|obs| obs.json_size as u64)
.sum::<u64>()
+ (observations.len() as u64).saturating_sub(1); // Add newlines between observations
let efficiency_percent = if total_json_size > 0 {
@ -96,6 +103,7 @@ pub fn run(flags: &flags::Info) -> Vec<Diagnostic> {
if observations.is_empty() {
let empty_output = JsonInfoOutput {
archive: flags.file.display().to_string(),
compression: compression_format.to_string(),
created: "".to_string(),
file_size,
snapshot_count,
@ -107,7 +115,7 @@ pub fn run(flags: &flags::Info) -> Vec<Diagnostic> {
"{}",
serde_json::to_string_pretty(&empty_output).unwrap_or_default()
);
return Vec::new();
return Ok(());
}
let json_observations: Vec<JsonObservation> = observations
@ -128,6 +136,7 @@ pub fn run(flags: &flags::Info) -> Vec<Diagnostic> {
let json_output = JsonInfoOutput {
archive: flags.file.display().to_string(),
compression: compression_format.to_string(),
created: observations[0].created.to_rfc3339(),
file_size,
snapshot_count,
@ -143,10 +152,11 @@ pub fn run(flags: &flags::Info) -> Vec<Diagnostic> {
} else {
// Human-readable output mode
println!("Archive: {}", flags.file.display());
println!("Compression: {}", compression_format);
if observations.is_empty() {
println!("No observations found");
return Vec::new();
return Ok(());
}
let first_timestamp = &observations[0].created;
@ -217,56 +227,26 @@ pub fn run(flags: &flags::Info) -> Vec<Diagnostic> {
snapshot_text,
comparison
);
println!(
"Data size: {}",
format_size(total_json_size)
);
println!("Data size: {}", format_size(total_json_size));
// Add usage instructions
println!();
println!("To get the JSON value at a specific observation:");
println!(" json-archive state --index <#> {}", flags.file.display());
println!(
" json-archive state --id <observation-id> {}",
flags.file.display()
);
println!();
println!("Examples:");
println!(
" json-archive state --index 0 {} # Get initial state",
flags.file.display()
);
println!(
" json-archive state --index 2 {} # Get state after observation 2",
flags.file.display()
);
println!(" json-archive state --index <#> <archive>");
println!(" json-archive state --id <observation-id> <archive>");
}
Vec::new()
Ok(())
}
fn collect_observations(file_path: &Path) -> Result<(Vec<ObservationInfo>, usize), Vec<Diagnostic>> {
let reader = match json_archive::ArchiveReader::new(file_path, json_archive::ReadMode::AppendSeek) {
Ok(r) => r,
Err(e) => {
return Err(vec![Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::PathNotFound,
format!("I couldn't open the archive file: {}", e),
)]);
}
};
fn collect_observations(
file_path: &Path,
) -> Result<(Vec<ObservationInfo>, usize, CompressionFormat), Vec<Diagnostic>> {
let opened = open_archive(file_path)?;
let compression_format = opened.format;
let (initial_state, mut event_iter) = match reader.events(file_path) {
Ok(r) => r,
Err(e) => {
return Err(vec![Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::PathNotFound,
format!("I couldn't read the archive file: {}", e),
)]);
}
};
let (initial_state, mut event_iter) =
read_events(opened.reader, &file_path.display().to_string())?;
// Check for fatal diagnostics from initial parsing
if event_iter.diagnostics.has_fatal() {
@ -295,7 +275,11 @@ fn collect_observations(file_path: &Path) -> Result<(Vec<ObservationInfo>, usize
// Iterate through events
while let Some(event) = event_iter.next() {
match event {
Event::Observe { observation_id, timestamp, change_count } => {
Event::Observe {
observation_id,
timestamp,
change_count,
} => {
observations.push(ObservationInfo {
id: observation_id,
timestamp,
@ -316,7 +300,9 @@ fn collect_observations(file_path: &Path) -> Result<(Vec<ObservationInfo>, usize
}
}
}
Event::Change { path, new_value, .. } => {
Event::Change {
path, new_value, ..
} => {
let _ = json_archive::apply_change(&mut current_state, &path, new_value);
// Update the JSON size of the last observation
@ -368,10 +354,9 @@ fn collect_observations(file_path: &Path) -> Result<(Vec<ObservationInfo>, usize
}
}
Ok((observations, snapshot_count))
Ok((observations, snapshot_count, compression_format))
}
fn format_timestamp(dt: &DateTime<Utc>) -> String {
dt.format("%a %H:%M:%S %d-%b-%Y").to_string()
}