feat: add reading compressed archive files

2025-09-30 11:19:13 -07:00 · 2025-09-30 11:19:13 -07:00 · 1f0f41a96c
commit 1f0f41a96c
parent 07e604ac25
8 changed files with 552 additions and 272 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -2,6 +2,27 @@
 # It is not intended for manual editing.
 version = 4
 [[package]]
 name = "adler2"
 version = "2.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa"
 [[package]]
 name = "alloc-no-stdlib"
 version = "2.0.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "cc7bb162ec39d46ab1ca8c77bf72e890535becd1751bb45f64c597edb4c8c6b3"
 [[package]]
 name = "alloc-stdlib"
 version = "0.2.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "94fb8275041c72129eb51b7d0322c29b8387a0386127718b096429201a5d6ece"
 dependencies = [
 "alloc-no-stdlib",
 ]
 [[package]]
 name = "android_system_properties"
 version = "0.1.5"
@ -32,6 +53,27 @@ version = "2.9.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2261d10cca569e4643e526d8dc2e62e433cc8aba21ab764233731f8d369bf394"
 [[package]]
 name = "brotli"
 version = "8.0.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4bd8b9603c7aa97359dbd97ecf258968c95f3adddd6db2f7e7a5bef101c84560"
 dependencies = [
 "alloc-no-stdlib",
 "alloc-stdlib",
 "brotli-decompressor",
 ]
 [[package]]
 name = "brotli-decompressor"
 version = "5.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "874bb8112abecc98cbd6d81ea4fa7e94fb9449648c93cc89aa40c81c24d7de03"
 dependencies = [
 "alloc-no-stdlib",
 "alloc-stdlib",
 ]
 [[package]]
 name = "bumpalo"
 version = "3.19.0"
@ -45,6 +87,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e1354349954c6fc9cb0deab020f27f783cf0b604e8bb754dc4658ecf0d29c35f"
 dependencies = [
 "find-msvc-tools",
 "jobserver",
 "libc",
 "shlex",
 ]
@ -74,6 +118,15 @@ version = "0.8.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
 [[package]]
 name = "crc32fast"
 version = "1.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511"
 dependencies = [
 "cfg-if",
 ]
 [[package]]
 name = "derive_arbitrary"
 version = "1.4.2"
@ -107,6 +160,16 @@ version = "0.1.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1ced73b1dacfc750a6db6c0a0c3a3853c8b41997e2e2c563dc90804ae6867959"
 [[package]]
 name = "flate2"
 version = "1.1.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4a3d7db9596fecd151c5f638c0ee5d5bd487b6e0ea232e5dc96d5250f6f94b1d"
 dependencies = [
 "crc32fast",
 "miniz_oxide",
 ]
 [[package]]
 name = "getrandom"
 version = "0.3.3"
@ -149,6 +212,16 @@ version = "1.0.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
 [[package]]
 name = "jobserver"
 version = "0.1.34"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33"
 dependencies = [
 "getrandom",
 "libc",
 ]
 [[package]]
 name = "js-sys"
 version = "0.3.81"
@ -164,12 +237,15 @@ name = "json-archive"
 version = "0.99.0"
 dependencies = [
 "arbitrary",
 "brotli",
 "chrono",
 "flate2",
 "serde",
 "serde_json",
 "tempfile",
 "uuid",
 "xflags",
 "zstd",
 ]
 [[package]]
@ -196,6 +272,15 @@ version = "2.7.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273"
 [[package]]
 name = "miniz_oxide"
 version = "0.8.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316"
 dependencies = [
 "adler2",
 ]
 [[package]]
 name = "num-traits"
 version = "0.2.19"
@ -211,6 +296,12 @@ version = "1.21.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
 [[package]]
 name = "pkg-config"
 version = "0.3.32"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
 [[package]]
 name = "proc-macro2"
 version = "1.0.101"
@ -516,3 +607,31 @@ name = "xflags-macros"
 version = "0.3.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "672423d4fea7ffa2f6c25ba60031ea13dc6258070556f125cc4d790007d4a155"
 [[package]]
 name = "zstd"
 version = "0.13.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a"
 dependencies = [
 "zstd-safe",
 ]
 [[package]]
 name = "zstd-safe"
 version = "7.2.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d"
 dependencies = [
 "zstd-sys",
 ]
 [[package]]
 name = "zstd-sys"
 version = "2.0.16+zstd.1.5.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "91e19ebc2adc8f83e43039e79776e3fda8ca919132d68a1fed6a5faca2683748"
 dependencies = [
 "cc",
 "pkg-config",
 ]
--- a/Cargo.toml
+++ b/Cargo.toml
@ -3,6 +3,10 @@ name = "json-archive"
 version = "0.99.0"
 edition = "2021"
 [features]
 default = ["compression"]
 compression = ["flate2", "brotli", "zstd"]
 [dependencies]
 xflags = "0.3"
 serde = { version = "1.0", features = ["derive"] }
@ -10,6 +14,11 @@ serde_json = "1.0"
 chrono = { version = "0.4", features = ["serde"] }
 uuid = { version = "1.0", features = ["v4", "serde"] }
 # Compression support (optional, enabled by default)
 flate2 = { version = "1.0", optional = true }
 brotli = { version = "8.0", optional = true }
 zstd = { version = "0.13", optional = true }
 [dev-dependencies]
 tempfile = "3.0"
 arbitrary = { version = "1.0", features = ["derive"] }
--- a/README.md
+++ b/README.md
@ -57,12 +57,20 @@ json-archive videoID.info.json
 ### Compression support (as a concession)
-While the core design keeps things simple and readable, the tool does work with compressed archives as a practical concession for those who need it. You can read from and write to gzip, brotli, and zlib compressed files without special flags.
+While the core design keeps things simple and readable, the tool does work with compressed archives as a practical concession for those who need it. You can read from and write to gzip, deflate, zlib, brotli, and zstd compressed files without special flags.
 **Important caveat**: Compressed archives may require rewriting the entire file during updates (depending on the compression format). If your temporary filesystem is full or too small, updates can fail. In that case, manually specify an output destination with `-o` to write the new archive elsewhere.
 This works fine for the happy path with archive files up to a few hundred megabytes, but contradicts the "keep it simple" design philosophy - it's included because it's practically useful.
 **Building without compression**: Compression libraries are a security vulnerability vector. The default build includes them because most users want convenience. If you don't want to bundle compression libraries:
 ```bash
 cargo install json-archive --no-default-features
 ```
 The minimal build detects compressed files and errors with a clear message explaining you need the full version or manual decompression.
 ## Archive format
 The format is JSONL with delta-based changes using [JSON Pointer](https://tools.ietf.org/html/rfc6901) paths. For complete technical details about the file format, see the [file format specification](docs/file-format-spec.md).
--- a/docs/demo/v1.json.archive.gz
+++ b/docs/demo/v1.json.archive.gz
--- a/docs/demo/v1.json.archive.zst
+++ b/docs/demo/v1.json.archive.zst
--- a/src/cmd/info.rs
+++ b/src/cmd/info.rs
@ -21,11 +21,8 @@
 use crate::flags;
 use chrono::{DateTime, Utc};
-use json_archive::{Diagnostic, DiagnosticCode, DiagnosticLevel};
+use json_archive::{Diagnostic, DiagnosticCode, DiagnosticLevel, Event};
 use serde::Serialize;
 use serde_json::Value;
 use std::fs::File;
 use std::io::{BufRead, BufReader};
 use std::path::Path;
 #[derive(Debug)]
@ -53,6 +50,8 @@ struct JsonInfoOutput {
    file_size: u64,
    snapshot_count: usize,
    observations: Vec<JsonObservation>,
    total_json_size: u64,
    efficiency_percent: f64,
 }
 pub fn run(flags: &flags::Info) -> Vec<Diagnostic> {
@ -69,8 +68,8 @@ pub fn run(flags: &flags::Info) -> Vec<Diagnostic> {
        )];
    }
-    let observations = match collect_observations(&flags.file) {
+    let (observations, snapshot_count) = match collect_observations(&flags.file) {
-        Ok(obs) => obs,
+        Ok((obs, count)) => (obs, count),
        Err(diagnostics) => return diagnostics,
    };
@ -79,7 +78,15 @@ pub fn run(flags: &flags::Info) -> Vec<Diagnostic> {
        Err(_) => 0,
    };
-    let snapshot_count = count_snapshots(&flags.file).unwrap_or(0);
+    // Calculate total JSON size (sum of all observations + newline separators)
    let total_json_size: u64 = observations.iter().map(|obs| obs.json_size as u64).sum::<u64>()
        + (observations.len() as u64).saturating_sub(1); // Add newlines between observations
    let efficiency_percent = if total_json_size > 0 {
        (file_size as f64 / total_json_size as f64) * 100.0
    } else {
        0.0
    };
    // Check output format
    let is_json_output = flags.output.as_ref().map(|s| s == "json").unwrap_or(false);
@ -93,6 +100,8 @@ pub fn run(flags: &flags::Info) -> Vec<Diagnostic> {
                file_size,
                snapshot_count,
                observations: Vec::new(),
                total_json_size: 0,
                efficiency_percent: 0.0,
            };
            println!(
                "{}",
@ -123,6 +132,8 @@ pub fn run(flags: &flags::Info) -> Vec<Diagnostic> {
            file_size,
            snapshot_count,
            observations: json_observations,
            total_json_size,
            efficiency_percent,
        };
        println!(
@ -193,10 +204,22 @@ pub fn run(flags: &flags::Info) -> Vec<Diagnostic> {
        } else {
            format!("{} snapshots", snapshot_count)
        };
        let comparison = if efficiency_percent < 100.0 {
            format!("{:.1}% smaller", 100.0 - efficiency_percent)
        } else {
            format!("{:.1}% larger", efficiency_percent - 100.0)
        };
        println!(
-            "Total archive size: {} ({})",
+            "Archive size: {} ({}, {} than JSON Lines)",
            format_size(file_size),
-            snapshot_text
+            snapshot_text,
            comparison
        );
        println!(
            "Data size: {}",
            format_size(total_json_size)
        );
        // Add usage instructions
@ -222,9 +245,9 @@ pub fn run(flags: &flags::Info) -> Vec<Diagnostic> {
    Vec::new()
 }
-fn collect_observations(file_path: &Path) -> Result<Vec<ObservationInfo>, Vec<Diagnostic>> {
+fn collect_observations(file_path: &Path) -> Result<(Vec<ObservationInfo>, usize), Vec<Diagnostic>> {
-    let file = match File::open(file_path) {
+    let reader = match json_archive::ArchiveReader::new(file_path, json_archive::ReadMode::AppendSeek) {
-        Ok(f) => f,
+        Ok(r) => r,
        Err(e) => {
            return Err(vec![Diagnostic::new(
                DiagnosticLevel::Fatal,
@ -234,44 +257,32 @@ fn collect_observations(file_path: &Path) -> Result<Vec<ObservationInfo>, Vec<Di
        }
    };
-    let reader = BufReader::new(file);
+    let (initial_state, mut event_iter) = match reader.events(file_path) {
-    let mut lines = reader.lines();
+        Ok(r) => r,
    let mut observations = Vec::new();
    // Parse header
    let header_line = match lines.next() {
        Some(Ok(line)) => line,
        _ => {
            return Err(vec![Diagnostic::new(
                DiagnosticLevel::Fatal,
                DiagnosticCode::EmptyFile,
                "Archive file is empty or unreadable".to_string(),
            )]);
        }
    };
    let header: Value = match serde_json::from_str(&header_line) {
        Ok(h) => h,
        Err(e) => {
            return Err(vec![Diagnostic::new(
                DiagnosticLevel::Fatal,
-                DiagnosticCode::MissingHeader,
+                DiagnosticCode::PathNotFound,
-                format!("I couldn't parse the header: {}", e),
+                format!("I couldn't read the archive file: {}", e),
            )]);
        }
    };
-    let created_str = header["created"].as_str().unwrap_or("");
+    // Check for fatal diagnostics from initial parsing
-    let created: DateTime<Utc> = match created_str.parse() {
+    if event_iter.diagnostics.has_fatal() {
-        Ok(dt) => dt,
+        return Err(event_iter.diagnostics.diagnostics().to_vec());
-        Err(_) => Utc::now(),
+    }
-    };
+
    let mut observations = Vec::new();
    let mut current_state = initial_state.clone();
    let mut snapshot_count = 0;
    let initial_state = header["initial"].clone();
    let initial_size = serde_json::to_string(&initial_state)
        .unwrap_or_default()
        .len();
    let created = event_iter.header.created;
    // Add initial state as observation 0
    observations.push(ObservationInfo {
        id: "initial".to_string(),
@ -281,54 +292,73 @@ fn collect_observations(file_path: &Path) -> Result<Vec<ObservationInfo>, Vec<Di
        json_size: initial_size,
    });
-    let mut current_state = initial_state;
+    // Iterate through events
-
+    while let Some(event) = event_iter.next() {
-    // Parse events
+        match event {
-    for line in lines {
+            Event::Observe { observation_id, timestamp, change_count } => {
        let line = match line {
            Ok(l) => l,
            Err(_) => continue,
        };
        if line.trim().starts_with('#') || line.trim().is_empty() {
            continue;
        }
        let event: Value = match serde_json::from_str(&line) {
            Ok(e) => e,
            Err(_) => continue,
        };
        if let Some(arr) = event.as_array() {
            if arr.is_empty() {
                continue;
            }
            let event_type = arr[0].as_str().unwrap_or("");
            if event_type == "observe" && arr.len() >= 4 {
                let obs_id = arr[1].as_str().unwrap_or("").to_string();
                let timestamp_str = arr[2].as_str().unwrap_or("");
                let change_count = arr[3].as_u64().unwrap_or(0) as usize;
                let timestamp: DateTime<Utc> = match timestamp_str.parse() {
                    Ok(dt) => dt,
                    Err(_) => continue,
                };
                observations.push(ObservationInfo {
-                    id: obs_id,
+                    id: observation_id,
                    timestamp,
                    created,
                    change_count,
                    json_size: 0, // Will be calculated after applying events
                });
-            } else {
+            }
-                // Apply the event to current_state for size calculation
+            Event::Add { path, value, .. } => {
-                apply_event_to_state(&mut current_state, &arr);
+                let _ = json_archive::apply_add(&mut current_state, &path, value);
                // Update the JSON size of the last observation
                if let Some(last_obs) = observations.last_mut() {
                    if last_obs.id != "initial" {
                        last_obs.json_size = serde_json::to_string(&current_state)
                            .unwrap_or_default()
                            .len();
                    }
                }
            }
            Event::Change { path, new_value, .. } => {
                let _ = json_archive::apply_change(&mut current_state, &path, new_value);
                // Update the JSON size of the last observation
                if let Some(last_obs) = observations.last_mut() {
                    if last_obs.id != "initial" {
                        last_obs.json_size = serde_json::to_string(&current_state)
                            .unwrap_or_default()
                            .len();
                    }
                }
            }
            Event::Remove { path, .. } => {
                let _ = json_archive::apply_remove(&mut current_state, &path);
                // Update the JSON size of the last observation
                if let Some(last_obs) = observations.last_mut() {
                    if last_obs.id != "initial" {
                        last_obs.json_size = serde_json::to_string(&current_state)
                            .unwrap_or_default()
                            .len();
                    }
                }
            }
            Event::Move { path, moves, .. } => {
                let _ = json_archive::apply_move(&mut current_state, &path, moves);
                // Update the JSON size of the last observation
                if let Some(last_obs) = observations.last_mut() {
                    if last_obs.id != "initial" {
                        last_obs.json_size = serde_json::to_string(&current_state)
                            .unwrap_or_default()
                            .len();
                    }
                }
            }
            Event::Snapshot { object, .. } => {
                current_state = object;
                snapshot_count += 1;
                // Update the JSON size of the last observation
                if let Some(last_obs) = observations.last_mut() {
                    if last_obs.id != "initial" {
                        last_obs.json_size = serde_json::to_string(&current_state)
                            .unwrap_or_default()
                            .len();
@ -336,41 +366,11 @@ fn collect_observations(file_path: &Path) -> Result<Vec<ObservationInfo>, Vec<Di
                }
            }
        }
    Ok(observations)
    }
-fn apply_event_to_state(state: &mut Value, event: &[Value]) {
+    Ok((observations, snapshot_count))
    if event.is_empty() {
        return;
 }
    let event_type = event[0].as_str().unwrap_or("");
    match event_type {
        "add" if event.len() >= 3 => {
            let path = event[1].as_str().unwrap_or("");
            let value = event[2].clone();
            if let Ok(pointer) = json_archive::pointer::JsonPointer::new(path) {
                let _ = pointer.set(state, value);
            }
        }
        "change" if event.len() >= 3 => {
            let path = event[1].as_str().unwrap_or("");
            let value = event[2].clone();
            if let Ok(pointer) = json_archive::pointer::JsonPointer::new(path) {
                let _ = pointer.set(state, value);
            }
        }
        "remove" if event.len() >= 2 => {
            let path = event[1].as_str().unwrap_or("");
            if let Ok(pointer) = json_archive::pointer::JsonPointer::new(path) {
                let _ = pointer.remove(state);
            }
        }
        _ => {}
    }
 }
 fn format_timestamp(dt: &DateTime<Utc>) -> String {
    dt.format("%a %H:%M:%S %d-%b-%Y").to_string()
@ -393,18 +393,3 @@ fn format_size(bytes: u64) -> String {
        format!("{:.1} MB", bytes as f64 / (1024.0 * 1024.0))
    }
 }
 fn count_snapshots(file_path: &Path) -> Result<usize, std::io::Error> {
    let file = File::open(file_path)?;
    let reader = BufReader::new(file);
    let mut count = 0;
    for line in reader.lines() {
        let line = line?;
        if line.trim().starts_with('[') && line.contains("\"snapshot\"") {
            count += 1;
        }
    }
    Ok(count)
 }
--- a/src/lib.rs
+++ b/src/lib.rs
@ -36,4 +36,4 @@ pub use detection::is_json_archive;
 pub use diagnostics::{Diagnostic, DiagnosticCode, DiagnosticCollector, DiagnosticLevel};
 pub use events::{Event, Header, Observation};
 pub use pointer::JsonPointer;
-pub use reader::{ArchiveReader, ReadMode, ReadResult};
+pub use reader::{apply_add, apply_change, apply_move, apply_remove, ArchiveReader, ReadMode, ReadResult};
--- a/src/reader.rs
+++ b/src/reader.rs
@ -22,7 +22,7 @@
 use serde_json::Value;
 use std::collections::HashSet;
 use std::fs::File;
-use std::io::{BufRead, BufReader};
+use std::io::{BufRead, BufReader, Read};
 use std::path::Path;
 use crate::diagnostics::{Diagnostic, DiagnosticCode, DiagnosticCollector, DiagnosticLevel};
@ -30,12 +30,29 @@ use crate::event_deserialize::EventDeserializer;
 use crate::events::{Event, Header};
 use crate::pointer::JsonPointer;
 #[cfg(feature = "compression")]
 use flate2::read::{DeflateDecoder, GzDecoder, ZlibDecoder};
 #[cfg(feature = "compression")]
 use brotli::Decompressor;
 #[cfg(feature = "compression")]
 use zstd::stream::read::Decoder as ZstdDecoder;
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub enum ReadMode {
    FullValidation,
    AppendSeek,
 }
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 enum CompressionFormat {
    Gzip,
    Deflate,
    Zlib,
    Brotli,
    Zstd,
    None,
 }
 pub struct ArchiveReader {
    mode: ReadMode,
    filename: String,
@ -49,134 +66,46 @@ pub struct ReadResult {
    pub observation_count: usize,
 }
-impl ArchiveReader {
+pub struct EventIterator {
-    pub fn new<P: AsRef<Path>>(path: P, mode: ReadMode) -> std::io::Result<Self> {
+    reader: Box<dyn BufRead>,
-        let filename = path.as_ref().display().to_string();
+    pub diagnostics: DiagnosticCollector,
-        Ok(Self { mode, filename })
+    pub header: Header,
    filename: String,
    line_number: usize,
 }
-    pub fn read<P: AsRef<Path>>(&self, path: P) -> std::io::Result<ReadResult> {
+impl Iterator for EventIterator {
-        let file = File::open(path)?;
+    type Item = Event;
        let reader = BufReader::new(file);
        let mut diagnostics = DiagnosticCollector::new();
-        let mut lines_iter = reader.lines().enumerate();
+    fn next(&mut self) -> Option<Self::Item> {
        let mut line = String::new();
-        let (header_line_number, header_line) = match lines_iter.next() {
+        loop {
-            Some((idx, Ok(line))) => (idx + 1, line),
+            line.clear();
-            Some((idx, Err(e))) if e.kind() == std::io::ErrorKind::InvalidData => {
+            self.line_number += 1;
                let line_number = idx + 1;
                diagnostics.add(
                    Diagnostic::new(
                        DiagnosticLevel::Fatal,
                        DiagnosticCode::InvalidUtf8,
                        format!("I found invalid UTF-8 bytes at line {}.", line_number)
                    )
                    .with_location(self.filename.clone(), line_number)
                    .with_advice(
                        "The JSON Archive format requires UTF-8 encoding. Make sure the file \
                         was saved with UTF-8 encoding, not Latin-1, Windows-1252, or another encoding."
                            .to_string()
                    )
                );
                return Ok(ReadResult {
                    header: Header::new(Value::Null, None),
                    final_state: Value::Null,
                    diagnostics,
                    observation_count: 0,
                });
            }
            Some((_, Err(e))) => return Err(e),
            None => {
                diagnostics.add(
                    Diagnostic::new(
                        DiagnosticLevel::Fatal,
                        DiagnosticCode::EmptyFile,
                        "I found an empty file, but I need at least a header line.".to_string(),
                    )
                    .with_location(self.filename.clone(), 1)
                    .with_advice(
                        "A valid JSON Archive file must start with a header object containing:\n\
                         - type: \"@peoplesgrocers/json-archive\"\n\
                         - version: 1\n\
                         - created: an ISO-8601 timestamp\n\
                         - initial: the initial state of the object"
                            .to_string(),
                    ),
                );
                return Ok(ReadResult {
                    header: Header::new(Value::Null, None),
                    final_state: Value::Null,
                    diagnostics,
                    observation_count: 0,
                });
            }
        };
        let header = match self.parse_header(&header_line, header_line_number, &mut diagnostics) {
            Some(h) => h,
            None => {
                return Ok(ReadResult {
                    header: Header::new(Value::Null, None),
                    final_state: Value::Null,
                    diagnostics,
                    observation_count: 0,
                });
            }
        };
        let mut state = header.initial.clone();
        let mut seen_observations: HashSet<String> = HashSet::new();
        let mut current_observation: Option<(String, usize, usize)> = None;
        let mut events_in_observation = 0;
        let mut observation_count = 0;
        // This manual dispatcher mirrors what serde would expand but stays explicit so we can
        // attach Elm-style diagnostics with precise spans and guidance for each failure case.
        for (idx, line_result) in lines_iter {
            let line_number = idx + 1;
            let line = match line_result {
                Ok(line) => line,
                Err(e) if e.kind() == std::io::ErrorKind::InvalidData => {
                    diagnostics.add(
                        Diagnostic::new(
                            DiagnosticLevel::Fatal,
                            DiagnosticCode::InvalidUtf8,
                            format!("I found invalid UTF-8 bytes at line {}.", line_number)
                        )
                        .with_location(self.filename.clone(), line_number)
                        .with_advice(
                            "The JSON Archive format requires UTF-8 encoding. Make sure the file \
                             was saved with UTF-8 encoding, not Latin-1, Windows-1252, or another encoding."
                                .to_string()
                        )
                    );
                    return Ok(ReadResult {
                        header: Header::new(Value::Null, None),
                        final_state: Value::Null,
                        diagnostics,
                        observation_count: 0,
                    });
                }
                Err(e) => return Err(e),
            };
            match self.reader.read_line(&mut line) {
                Ok(0) => return None, // EOF
                Ok(_) => {
                    let trimmed = line.trim();
                    // Skip comments and blank lines
                    if trimmed.starts_with('#') || trimmed.is_empty() {
                        continue;
                    }
                    // Try to parse as event
                    let event_deserializer = match serde_json::from_str::<EventDeserializer>(&line) {
                        Ok(d) => d,
                        Err(e) => {
-                    diagnostics.add(
+                            self.diagnostics.add(
                                Diagnostic::new(
                                    DiagnosticLevel::Fatal,
                                    DiagnosticCode::InvalidEventJson,
                                    format!("I couldn't parse this line as JSON: {}", e),
                                )
-                        .with_location(self.filename.clone(), line_number)
+                                .with_location(self.filename.clone(), self.line_number)
-                        .with_snippet(format!("{} | {}", line_number, line))
+                                .with_snippet(format!("{} | {}", self.line_number, line.trim()))
                                .with_advice(
                                    "Each line after the header must be either:\n\
                                     - A comment starting with #\n\
@ -189,31 +118,261 @@ impl ArchiveReader {
                        }
                    };
-            // Add any diagnostics from deserialization with location info
+                    // Add any diagnostics from deserialization
                    for diagnostic in event_deserializer.diagnostics {
-                diagnostics.add(
+                        self.diagnostics.add(
                            diagnostic
-                        .with_location(self.filename.clone(), line_number)
+                                .with_location(self.filename.clone(), self.line_number)
-                        .with_snippet(format!("{} | {}", line_number, line))
+                                .with_snippet(format!("{} | {}", self.line_number, line.trim()))
                        );
                    }
-            // Continue processing to collect additional errors before failing.
+                    // Return event if we have one
-            // Even though this function must now return an error, we continue to help
+                    if let Some(event) = event_deserializer.event {
-            // the user identify all issues in the file at once rather than one at a time.
+                        return Some(event);
-            let event = match event_deserializer.event {
+                    }
-                Some(e) => e,
+
-                None => {
+                    // If no event but had diagnostics, continue to next line
-                    assert!(diagnostics.has_fatal(), "Expected a fatal diagnostic when deserialization fails");
+                    continue;
-                    continue
+                }
-                },
+                Err(e) if e.kind() == std::io::ErrorKind::InvalidData => {
                    self.diagnostics.add(
                        Diagnostic::new(
                            DiagnosticLevel::Fatal,
                            DiagnosticCode::InvalidUtf8,
                            format!("I found invalid UTF-8 bytes at line {}.", self.line_number)
                        )
                        .with_location(self.filename.clone(), self.line_number)
                        .with_advice(
                            "The JSON Archive format requires UTF-8 encoding. Make sure the file \
                             was saved with UTF-8 encoding, not Latin-1, Windows-1252, or another encoding."
                                .to_string()
                        )
                    );
                    return None;
                }
                Err(_) => return None,
            }
        }
    }
 }
 fn detect_compression_format(path: &Path, bytes: &[u8]) -> CompressionFormat {
    if bytes.len() < 4 {
        return CompressionFormat::None;
    }
    // Gzip magic number: 0x1f 0x8b
    if bytes[0] == 0x1f && bytes[1] == 0x8b {
        return CompressionFormat::Gzip;
    }
    // Zlib magic number: 0x78 followed by 0x01, 0x5e, 0x9c, or 0xda
    if bytes[0] == 0x78 && (bytes[1] == 0x01 || bytes[1] == 0x5e || bytes[1] == 0x9c || bytes[1] == 0xda) {
        return CompressionFormat::Zlib;
    }
    // Zstd magic number: 0x28 0xb5 0x2f 0xfd
    if bytes.len() >= 4 && bytes[0] == 0x28 && bytes[1] == 0xb5 && bytes[2] == 0x2f && bytes[3] == 0xfd {
        return CompressionFormat::Zstd;
    }
    // Check file extension for brotli (no reliable magic number) and deflate
    if let Some(ext) = path.extension() {
        let ext_str = ext.to_string_lossy();
        if ext_str == "br" || path.to_string_lossy().contains(".br.") {
            return CompressionFormat::Brotli;
        }
        if ext_str == "deflate" {
            return CompressionFormat::Deflate;
        }
    }
    CompressionFormat::None
 }
 impl ArchiveReader {
    pub fn new<P: AsRef<Path>>(path: P, mode: ReadMode) -> std::io::Result<Self> {
        let filename = path.as_ref().display().to_string();
        Ok(Self { mode, filename })
    }
    pub fn events<P: AsRef<Path>>(&self, path: P) -> std::io::Result<(Value, EventIterator)> {
        let path = path.as_ref();
        let mut file = File::open(path)?;
        // Detect compression format
        let mut magic_bytes = [0u8; 4];
        let bytes_read = file.read(&mut magic_bytes)?;
        let compression_format = detect_compression_format(path, &magic_bytes[..bytes_read]);
        // Re-open file to reset position
        file = File::open(path)?;
        let mut diagnostics = DiagnosticCollector::new();
        // Check if compression is detected but not supported
        #[cfg(not(feature = "compression"))]
        if compression_format != CompressionFormat::None {
            let format_name = match compression_format {
                CompressionFormat::Gzip => "gzip",
                CompressionFormat::Deflate => "deflate",
                CompressionFormat::Zlib => "zlib",
                CompressionFormat::Brotli => "brotli",
                CompressionFormat::Zstd => "zstd",
                CompressionFormat::None => unreachable!(),
            };
            diagnostics.add(
                Diagnostic::new(
                    DiagnosticLevel::Fatal,
                    DiagnosticCode::UnsupportedVersion,
                    format!("I detected a {}-compressed archive, but this build doesn't support compression.", format_name)
                )
                .with_location(self.filename.clone(), 1)
                .with_advice(
                    "This binary was built without compression support to reduce binary size and dependencies.\n\
                     You have two options:\n\
                     1. Install the version with compression support: cargo install json-archive --features compression\n\
                     2. Manually decompress the file first, then use this tool on the uncompressed archive"
                        .to_string()
                )
            );
            // Return dummy values with fatal diagnostic
            let iterator = EventIterator {
                reader: Box::new(BufReader::new(std::io::empty())),
                diagnostics,
                header: Header::new(Value::Null, None),
                filename: self.filename.clone(),
                line_number: 1,
            };
            return Ok((Value::Null, iterator));
        }
        // Create appropriate reader based on compression format
        #[cfg(feature = "compression")]
        let reader: Box<dyn BufRead> = match compression_format {
            CompressionFormat::Gzip => Box::new(BufReader::new(GzDecoder::new(file))),
            CompressionFormat::Deflate => Box::new(BufReader::new(DeflateDecoder::new(file))),
            CompressionFormat::Zlib => Box::new(BufReader::new(ZlibDecoder::new(file))),
            CompressionFormat::Brotli => Box::new(BufReader::new(Decompressor::new(file, 4096))),
            CompressionFormat::Zstd => Box::new(BufReader::new(ZstdDecoder::new(file)?)),
            CompressionFormat::None => Box::new(BufReader::new(file)),
        };
        #[cfg(not(feature = "compression"))]
        let reader: Box<dyn BufRead> = Box::new(BufReader::new(file));
        let mut reader = reader;
        let mut header_line = String::new();
        let _bytes_read = match reader.read_line(&mut header_line) {
            Ok(0) => {
                // Empty file
                diagnostics.add(
                    Diagnostic::new(
                        DiagnosticLevel::Fatal,
                        DiagnosticCode::EmptyFile,
                        "I found an empty file, but I need at least a header line.".to_string(),
                    )
                    .with_location(self.filename.clone(), 1)
                    .with_advice(
                        "See the file format specification for header structure."
                            .to_string(),
                    ),
                );
                let iterator = EventIterator {
                    reader,
                    diagnostics,
                    header: Header::new(Value::Null, None),
                    filename: self.filename.clone(),
                    line_number: 1,
                };
                return Ok((Value::Null, iterator));
            }
            Ok(n) => n,
            Err(e) if e.kind() == std::io::ErrorKind::InvalidData => {
                // UTF-8 error
                diagnostics.add(
                    Diagnostic::new(
                        DiagnosticLevel::Fatal,
                        DiagnosticCode::InvalidUtf8,
                        "I found invalid UTF-8 bytes at line 1.".to_string()
                    )
                    .with_location(self.filename.clone(), 1)
                    .with_advice(
                        "The JSON Archive format requires UTF-8 encoding. Make sure the file \
                         was saved with UTF-8 encoding, not Latin-1, Windows-1252, or another encoding."
                            .to_string()
                    )
                );
                let iterator = EventIterator {
                    reader,
                    diagnostics,
                    header: Header::new(Value::Null, None),
                    filename: self.filename.clone(),
                    line_number: 1,
                };
                return Ok((Value::Null, iterator));
            }
            Err(e) => return Err(e),
        };
        let header = match self.parse_header(&header_line, 1, &mut diagnostics) {
            Some(h) => h,
            None => {
                let iterator = EventIterator {
                    reader,
                    diagnostics,
                    header: Header::new(Value::Null, None),
                    filename: self.filename.clone(),
                    line_number: 1,
                };
                return Ok((Value::Null, iterator));
            }
        };
        let iterator = EventIterator {
            reader,
            diagnostics,
            header: header.clone(),
            filename: self.filename.clone(),
            line_number: 1,
        };
        Ok((header.initial, iterator))
    }
    pub fn read<P: AsRef<Path>>(&self, path: P) -> std::io::Result<ReadResult> {
        let (initial_value, mut event_iter) = self.events(&path)?;
        // Check for early fatal diagnostics (like compression not supported)
        if event_iter.diagnostics.has_fatal() {
            return Ok(ReadResult {
                header: Header::new(Value::Null, None),
                final_state: Value::Null,
                diagnostics: event_iter.diagnostics,
                observation_count: 0,
            });
        }
        let header = Header::new(initial_value.clone(), None);
        let mut state = initial_value;
        let mut seen_observations: HashSet<String> = HashSet::new();
        let mut current_observation: Option<(String, usize, usize)> = None;
        let mut events_in_observation = 0;
        let mut observation_count = 0;
        // Process events from iterator
        while let Some(event) = event_iter.next() {
            let line_number = event_iter.line_number;
            match event {
                Event::Observe { observation_id, timestamp: _, change_count } => {
                    if let Some((_obs_id, obs_line, expected_count)) = &current_observation {
                        if events_in_observation != *expected_count {
-                            diagnostics.add(
+                            event_iter.diagnostics.add(
                                Diagnostic::new(
                                    DiagnosticLevel::Warning,
                                    DiagnosticCode::ChangeCountMismatch,
@ -233,7 +392,7 @@ impl ArchiveReader {
                    }
                    if seen_observations.contains(&observation_id) {
-                        diagnostics.add(
+                        event_iter.diagnostics.add(
                            Diagnostic::new(
                                DiagnosticLevel::Warning,
                                DiagnosticCode::DuplicateObservationId,
@ -260,14 +419,13 @@ impl ArchiveReader {
                    if self.mode == ReadMode::FullValidation
                        && !seen_observations.contains(&observation_id)
                    {
-                        diagnostics.add(
+                        event_iter.diagnostics.add(
                            Diagnostic::new(
                                DiagnosticLevel::Fatal,
                                DiagnosticCode::NonExistentObservationId,
                                format!("I found a reference to observation '{}', but I haven't seen an observe event with that ID yet.", observation_id)
                            )
                            .with_location(self.filename.clone(), line_number)
                            .with_snippet(format!("{} | {}", line_number, line))
                            .with_advice(
                                "Each add/change/remove/move event must reference an observation ID from a preceding observe event."
                                    .to_string()
@ -277,7 +435,7 @@ impl ArchiveReader {
                    }
                    if let Err(diag) = apply_add(&mut state, &path, value) {
-                        diagnostics.add(diag.with_location(self.filename.clone(), line_number));
+                        event_iter.diagnostics.add(diag.with_location(self.filename.clone(), line_number));
                        continue;
                    }
                }
@ -288,7 +446,7 @@ impl ArchiveReader {
                    if self.mode == ReadMode::FullValidation
                        && !seen_observations.contains(&observation_id)
                    {
-                        diagnostics.add(
+                        event_iter.diagnostics.add(
                            Diagnostic::new(
                                DiagnosticLevel::Fatal,
                                DiagnosticCode::NonExistentObservationId,
@ -300,7 +458,7 @@ impl ArchiveReader {
                    }
                    if let Err(diag) = apply_change(&mut state, &path, new_value) {
-                        diagnostics.add(diag.with_location(self.filename.clone(), line_number));
+                        event_iter.diagnostics.add(diag.with_location(self.filename.clone(), line_number));
                        continue;
                    }
                }
@ -311,7 +469,7 @@ impl ArchiveReader {
                    if self.mode == ReadMode::FullValidation
                        && !seen_observations.contains(&observation_id)
                    {
-                        diagnostics.add(
+                        event_iter.diagnostics.add(
                            Diagnostic::new(
                                DiagnosticLevel::Fatal,
                                DiagnosticCode::NonExistentObservationId,
@ -323,7 +481,7 @@ impl ArchiveReader {
                    }
                    if let Err(diag) = apply_remove(&mut state, &path) {
-                        diagnostics.add(diag.with_location(self.filename.clone(), line_number));
+                        event_iter.diagnostics.add(diag.with_location(self.filename.clone(), line_number));
                        continue;
                    }
                }
@ -334,7 +492,7 @@ impl ArchiveReader {
                    if self.mode == ReadMode::FullValidation
                        && !seen_observations.contains(&observation_id)
                    {
-                        diagnostics.add(
+                        event_iter.diagnostics.add(
                            Diagnostic::new(
                                DiagnosticLevel::Fatal,
                                DiagnosticCode::NonExistentObservationId,
@ -346,14 +504,14 @@ impl ArchiveReader {
                    }
                    if let Err(diag) = apply_move(&mut state, &path, moves) {
-                        diagnostics.add(diag.with_location(self.filename.clone(), line_number));
+                        event_iter.diagnostics.add(diag.with_location(self.filename.clone(), line_number));
                        continue;
                    }
                }
                Event::Snapshot { observation_id: _, timestamp: _, object } => {
                    if self.mode == ReadMode::FullValidation && state != object {
-                        diagnostics.add(
+                        event_iter.diagnostics.add(
                            Diagnostic::new(
                                DiagnosticLevel::Fatal,
                                DiagnosticCode::SnapshotStateMismatch,
@ -376,7 +534,7 @@ impl ArchiveReader {
        if let Some((_obs_id, obs_line, expected_count)) = &current_observation {
            if events_in_observation != *expected_count {
-                diagnostics.add(
+                event_iter.diagnostics.add(
                    Diagnostic::new(
                        DiagnosticLevel::Warning,
                        DiagnosticCode::ChangeCountMismatch,
@ -393,10 +551,11 @@ impl ArchiveReader {
        Ok(ReadResult {
            header,
            final_state: state,
-            diagnostics,
+            diagnostics: event_iter.diagnostics,
            observation_count,
        })
    }
    fn parse_header(
        &self,
        line: &str,
@ -470,7 +629,7 @@ impl ArchiveReader {
 }
-fn apply_add(state: &mut Value, path: &str, value: Value) -> Result<(), Diagnostic> {
+pub fn apply_add(state: &mut Value, path: &str, value: Value) -> Result<(), Diagnostic> {
    let pointer = JsonPointer::new(path).map_err(|diag| {
        diag.with_advice(
            "JSON Pointer paths must start with '/' and use '/' to separate segments.\n\
@ -488,19 +647,19 @@ fn apply_add(state: &mut Value, path: &str, value: Value) -> Result<(), Diagnost
    })
 }
-fn apply_change(state: &mut Value, path: &str, new_value: Value) -> Result<(), Diagnostic> {
+pub fn apply_change(state: &mut Value, path: &str, new_value: Value) -> Result<(), Diagnostic> {
    let pointer = JsonPointer::new(path)?;
    pointer.set(state, new_value)?;
    Ok(())
 }
-fn apply_remove(state: &mut Value, path: &str) -> Result<(), Diagnostic> {
+pub fn apply_remove(state: &mut Value, path: &str) -> Result<(), Diagnostic> {
    let pointer = JsonPointer::new(path)?;
    pointer.remove(state)?;
    Ok(())
 }
-fn apply_move(
+pub fn apply_move(
    state: &mut Value,
    path: &str,
    moves: Vec<(usize, usize)>,