feat: add reading compressed archive files
This commit is contained in:
parent
07e604ac25
commit
1f0f41a96c
8 changed files with 552 additions and 272 deletions
119
Cargo.lock
generated
119
Cargo.lock
generated
|
|
@ -2,6 +2,27 @@
|
||||||
# It is not intended for manual editing.
|
# It is not intended for manual editing.
|
||||||
version = 4
|
version = 4
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "adler2"
|
||||||
|
version = "2.0.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "alloc-no-stdlib"
|
||||||
|
version = "2.0.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "cc7bb162ec39d46ab1ca8c77bf72e890535becd1751bb45f64c597edb4c8c6b3"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "alloc-stdlib"
|
||||||
|
version = "0.2.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "94fb8275041c72129eb51b7d0322c29b8387a0386127718b096429201a5d6ece"
|
||||||
|
dependencies = [
|
||||||
|
"alloc-no-stdlib",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "android_system_properties"
|
name = "android_system_properties"
|
||||||
version = "0.1.5"
|
version = "0.1.5"
|
||||||
|
|
@ -32,6 +53,27 @@ version = "2.9.4"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "2261d10cca569e4643e526d8dc2e62e433cc8aba21ab764233731f8d369bf394"
|
checksum = "2261d10cca569e4643e526d8dc2e62e433cc8aba21ab764233731f8d369bf394"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "brotli"
|
||||||
|
version = "8.0.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "4bd8b9603c7aa97359dbd97ecf258968c95f3adddd6db2f7e7a5bef101c84560"
|
||||||
|
dependencies = [
|
||||||
|
"alloc-no-stdlib",
|
||||||
|
"alloc-stdlib",
|
||||||
|
"brotli-decompressor",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "brotli-decompressor"
|
||||||
|
version = "5.0.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "874bb8112abecc98cbd6d81ea4fa7e94fb9449648c93cc89aa40c81c24d7de03"
|
||||||
|
dependencies = [
|
||||||
|
"alloc-no-stdlib",
|
||||||
|
"alloc-stdlib",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "bumpalo"
|
name = "bumpalo"
|
||||||
version = "3.19.0"
|
version = "3.19.0"
|
||||||
|
|
@ -45,6 +87,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "e1354349954c6fc9cb0deab020f27f783cf0b604e8bb754dc4658ecf0d29c35f"
|
checksum = "e1354349954c6fc9cb0deab020f27f783cf0b604e8bb754dc4658ecf0d29c35f"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"find-msvc-tools",
|
"find-msvc-tools",
|
||||||
|
"jobserver",
|
||||||
|
"libc",
|
||||||
"shlex",
|
"shlex",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
@ -74,6 +118,15 @@ version = "0.8.7"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
|
checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "crc32fast"
|
||||||
|
version = "1.5.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "derive_arbitrary"
|
name = "derive_arbitrary"
|
||||||
version = "1.4.2"
|
version = "1.4.2"
|
||||||
|
|
@ -107,6 +160,16 @@ version = "0.1.2"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "1ced73b1dacfc750a6db6c0a0c3a3853c8b41997e2e2c563dc90804ae6867959"
|
checksum = "1ced73b1dacfc750a6db6c0a0c3a3853c8b41997e2e2c563dc90804ae6867959"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "flate2"
|
||||||
|
version = "1.1.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "4a3d7db9596fecd151c5f638c0ee5d5bd487b6e0ea232e5dc96d5250f6f94b1d"
|
||||||
|
dependencies = [
|
||||||
|
"crc32fast",
|
||||||
|
"miniz_oxide",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "getrandom"
|
name = "getrandom"
|
||||||
version = "0.3.3"
|
version = "0.3.3"
|
||||||
|
|
@ -149,6 +212,16 @@ version = "1.0.15"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
|
checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "jobserver"
|
||||||
|
version = "0.1.34"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33"
|
||||||
|
dependencies = [
|
||||||
|
"getrandom",
|
||||||
|
"libc",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "js-sys"
|
name = "js-sys"
|
||||||
version = "0.3.81"
|
version = "0.3.81"
|
||||||
|
|
@ -164,12 +237,15 @@ name = "json-archive"
|
||||||
version = "0.99.0"
|
version = "0.99.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arbitrary",
|
"arbitrary",
|
||||||
|
"brotli",
|
||||||
"chrono",
|
"chrono",
|
||||||
|
"flate2",
|
||||||
"serde",
|
"serde",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
"tempfile",
|
"tempfile",
|
||||||
"uuid",
|
"uuid",
|
||||||
"xflags",
|
"xflags",
|
||||||
|
"zstd",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
|
@ -196,6 +272,15 @@ version = "2.7.6"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273"
|
checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "miniz_oxide"
|
||||||
|
version = "0.8.9"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316"
|
||||||
|
dependencies = [
|
||||||
|
"adler2",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "num-traits"
|
name = "num-traits"
|
||||||
version = "0.2.19"
|
version = "0.2.19"
|
||||||
|
|
@ -211,6 +296,12 @@ version = "1.21.3"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
|
checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pkg-config"
|
||||||
|
version = "0.3.32"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "proc-macro2"
|
name = "proc-macro2"
|
||||||
version = "1.0.101"
|
version = "1.0.101"
|
||||||
|
|
@ -516,3 +607,31 @@ name = "xflags-macros"
|
||||||
version = "0.3.2"
|
version = "0.3.2"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "672423d4fea7ffa2f6c25ba60031ea13dc6258070556f125cc4d790007d4a155"
|
checksum = "672423d4fea7ffa2f6c25ba60031ea13dc6258070556f125cc4d790007d4a155"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "zstd"
|
||||||
|
version = "0.13.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a"
|
||||||
|
dependencies = [
|
||||||
|
"zstd-safe",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "zstd-safe"
|
||||||
|
version = "7.2.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d"
|
||||||
|
dependencies = [
|
||||||
|
"zstd-sys",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "zstd-sys"
|
||||||
|
version = "2.0.16+zstd.1.5.7"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "91e19ebc2adc8f83e43039e79776e3fda8ca919132d68a1fed6a5faca2683748"
|
||||||
|
dependencies = [
|
||||||
|
"cc",
|
||||||
|
"pkg-config",
|
||||||
|
]
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,10 @@ name = "json-archive"
|
||||||
version = "0.99.0"
|
version = "0.99.0"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
|
|
||||||
|
[features]
|
||||||
|
default = ["compression"]
|
||||||
|
compression = ["flate2", "brotli", "zstd"]
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
xflags = "0.3"
|
xflags = "0.3"
|
||||||
serde = { version = "1.0", features = ["derive"] }
|
serde = { version = "1.0", features = ["derive"] }
|
||||||
|
|
@ -10,6 +14,11 @@ serde_json = "1.0"
|
||||||
chrono = { version = "0.4", features = ["serde"] }
|
chrono = { version = "0.4", features = ["serde"] }
|
||||||
uuid = { version = "1.0", features = ["v4", "serde"] }
|
uuid = { version = "1.0", features = ["v4", "serde"] }
|
||||||
|
|
||||||
|
# Compression support (optional, enabled by default)
|
||||||
|
flate2 = { version = "1.0", optional = true }
|
||||||
|
brotli = { version = "8.0", optional = true }
|
||||||
|
zstd = { version = "0.13", optional = true }
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
tempfile = "3.0"
|
tempfile = "3.0"
|
||||||
arbitrary = { version = "1.0", features = ["derive"] }
|
arbitrary = { version = "1.0", features = ["derive"] }
|
||||||
|
|
|
||||||
10
README.md
10
README.md
|
|
@ -57,12 +57,20 @@ json-archive videoID.info.json
|
||||||
|
|
||||||
### Compression support (as a concession)
|
### Compression support (as a concession)
|
||||||
|
|
||||||
While the core design keeps things simple and readable, the tool does work with compressed archives as a practical concession for those who need it. You can read from and write to gzip, brotli, and zlib compressed files without special flags.
|
While the core design keeps things simple and readable, the tool does work with compressed archives as a practical concession for those who need it. You can read from and write to gzip, deflate, zlib, brotli, and zstd compressed files without special flags.
|
||||||
|
|
||||||
**Important caveat**: Compressed archives may require rewriting the entire file during updates (depending on the compression format). If your temporary filesystem is full or too small, updates can fail. In that case, manually specify an output destination with `-o` to write the new archive elsewhere.
|
**Important caveat**: Compressed archives may require rewriting the entire file during updates (depending on the compression format). If your temporary filesystem is full or too small, updates can fail. In that case, manually specify an output destination with `-o` to write the new archive elsewhere.
|
||||||
|
|
||||||
This works fine for the happy path with archive files up to a few hundred megabytes, but contradicts the "keep it simple" design philosophy - it's included because it's practically useful.
|
This works fine for the happy path with archive files up to a few hundred megabytes, but contradicts the "keep it simple" design philosophy - it's included because it's practically useful.
|
||||||
|
|
||||||
|
**Building without compression**: Compression libraries are a security vulnerability vector. The default build includes them because most users want convenience. If you don't want to bundle compression libraries:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cargo install json-archive --no-default-features
|
||||||
|
```
|
||||||
|
|
||||||
|
The minimal build detects compressed files and errors with a clear message explaining you need the full version or manual decompression.
|
||||||
|
|
||||||
## Archive format
|
## Archive format
|
||||||
|
|
||||||
The format is JSONL with delta-based changes using [JSON Pointer](https://tools.ietf.org/html/rfc6901) paths. For complete technical details about the file format, see the [file format specification](docs/file-format-spec.md).
|
The format is JSONL with delta-based changes using [JSON Pointer](https://tools.ietf.org/html/rfc6901) paths. For complete technical details about the file format, see the [file format specification](docs/file-format-spec.md).
|
||||||
|
|
|
||||||
BIN
docs/demo/v1.json.archive.gz
Normal file
BIN
docs/demo/v1.json.archive.gz
Normal file
Binary file not shown.
BIN
docs/demo/v1.json.archive.zst
Normal file
BIN
docs/demo/v1.json.archive.zst
Normal file
Binary file not shown.
233
src/cmd/info.rs
233
src/cmd/info.rs
|
|
@ -21,11 +21,8 @@
|
||||||
|
|
||||||
use crate::flags;
|
use crate::flags;
|
||||||
use chrono::{DateTime, Utc};
|
use chrono::{DateTime, Utc};
|
||||||
use json_archive::{Diagnostic, DiagnosticCode, DiagnosticLevel};
|
use json_archive::{Diagnostic, DiagnosticCode, DiagnosticLevel, Event};
|
||||||
use serde::Serialize;
|
use serde::Serialize;
|
||||||
use serde_json::Value;
|
|
||||||
use std::fs::File;
|
|
||||||
use std::io::{BufRead, BufReader};
|
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
|
|
@ -53,6 +50,8 @@ struct JsonInfoOutput {
|
||||||
file_size: u64,
|
file_size: u64,
|
||||||
snapshot_count: usize,
|
snapshot_count: usize,
|
||||||
observations: Vec<JsonObservation>,
|
observations: Vec<JsonObservation>,
|
||||||
|
total_json_size: u64,
|
||||||
|
efficiency_percent: f64,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn run(flags: &flags::Info) -> Vec<Diagnostic> {
|
pub fn run(flags: &flags::Info) -> Vec<Diagnostic> {
|
||||||
|
|
@ -69,8 +68,8 @@ pub fn run(flags: &flags::Info) -> Vec<Diagnostic> {
|
||||||
)];
|
)];
|
||||||
}
|
}
|
||||||
|
|
||||||
let observations = match collect_observations(&flags.file) {
|
let (observations, snapshot_count) = match collect_observations(&flags.file) {
|
||||||
Ok(obs) => obs,
|
Ok((obs, count)) => (obs, count),
|
||||||
Err(diagnostics) => return diagnostics,
|
Err(diagnostics) => return diagnostics,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -79,7 +78,15 @@ pub fn run(flags: &flags::Info) -> Vec<Diagnostic> {
|
||||||
Err(_) => 0,
|
Err(_) => 0,
|
||||||
};
|
};
|
||||||
|
|
||||||
let snapshot_count = count_snapshots(&flags.file).unwrap_or(0);
|
// Calculate total JSON size (sum of all observations + newline separators)
|
||||||
|
let total_json_size: u64 = observations.iter().map(|obs| obs.json_size as u64).sum::<u64>()
|
||||||
|
+ (observations.len() as u64).saturating_sub(1); // Add newlines between observations
|
||||||
|
|
||||||
|
let efficiency_percent = if total_json_size > 0 {
|
||||||
|
(file_size as f64 / total_json_size as f64) * 100.0
|
||||||
|
} else {
|
||||||
|
0.0
|
||||||
|
};
|
||||||
|
|
||||||
// Check output format
|
// Check output format
|
||||||
let is_json_output = flags.output.as_ref().map(|s| s == "json").unwrap_or(false);
|
let is_json_output = flags.output.as_ref().map(|s| s == "json").unwrap_or(false);
|
||||||
|
|
@ -93,6 +100,8 @@ pub fn run(flags: &flags::Info) -> Vec<Diagnostic> {
|
||||||
file_size,
|
file_size,
|
||||||
snapshot_count,
|
snapshot_count,
|
||||||
observations: Vec::new(),
|
observations: Vec::new(),
|
||||||
|
total_json_size: 0,
|
||||||
|
efficiency_percent: 0.0,
|
||||||
};
|
};
|
||||||
println!(
|
println!(
|
||||||
"{}",
|
"{}",
|
||||||
|
|
@ -123,6 +132,8 @@ pub fn run(flags: &flags::Info) -> Vec<Diagnostic> {
|
||||||
file_size,
|
file_size,
|
||||||
snapshot_count,
|
snapshot_count,
|
||||||
observations: json_observations,
|
observations: json_observations,
|
||||||
|
total_json_size,
|
||||||
|
efficiency_percent,
|
||||||
};
|
};
|
||||||
|
|
||||||
println!(
|
println!(
|
||||||
|
|
@ -193,10 +204,22 @@ pub fn run(flags: &flags::Info) -> Vec<Diagnostic> {
|
||||||
} else {
|
} else {
|
||||||
format!("{} snapshots", snapshot_count)
|
format!("{} snapshots", snapshot_count)
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let comparison = if efficiency_percent < 100.0 {
|
||||||
|
format!("{:.1}% smaller", 100.0 - efficiency_percent)
|
||||||
|
} else {
|
||||||
|
format!("{:.1}% larger", efficiency_percent - 100.0)
|
||||||
|
};
|
||||||
|
|
||||||
println!(
|
println!(
|
||||||
"Total archive size: {} ({})",
|
"Archive size: {} ({}, {} than JSON Lines)",
|
||||||
format_size(file_size),
|
format_size(file_size),
|
||||||
snapshot_text
|
snapshot_text,
|
||||||
|
comparison
|
||||||
|
);
|
||||||
|
println!(
|
||||||
|
"Data size: {}",
|
||||||
|
format_size(total_json_size)
|
||||||
);
|
);
|
||||||
|
|
||||||
// Add usage instructions
|
// Add usage instructions
|
||||||
|
|
@ -222,9 +245,9 @@ pub fn run(flags: &flags::Info) -> Vec<Diagnostic> {
|
||||||
Vec::new()
|
Vec::new()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn collect_observations(file_path: &Path) -> Result<Vec<ObservationInfo>, Vec<Diagnostic>> {
|
fn collect_observations(file_path: &Path) -> Result<(Vec<ObservationInfo>, usize), Vec<Diagnostic>> {
|
||||||
let file = match File::open(file_path) {
|
let reader = match json_archive::ArchiveReader::new(file_path, json_archive::ReadMode::AppendSeek) {
|
||||||
Ok(f) => f,
|
Ok(r) => r,
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
return Err(vec![Diagnostic::new(
|
return Err(vec![Diagnostic::new(
|
||||||
DiagnosticLevel::Fatal,
|
DiagnosticLevel::Fatal,
|
||||||
|
|
@ -234,44 +257,32 @@ fn collect_observations(file_path: &Path) -> Result<Vec<ObservationInfo>, Vec<Di
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
let reader = BufReader::new(file);
|
let (initial_state, mut event_iter) = match reader.events(file_path) {
|
||||||
let mut lines = reader.lines();
|
Ok(r) => r,
|
||||||
let mut observations = Vec::new();
|
|
||||||
|
|
||||||
// Parse header
|
|
||||||
let header_line = match lines.next() {
|
|
||||||
Some(Ok(line)) => line,
|
|
||||||
_ => {
|
|
||||||
return Err(vec![Diagnostic::new(
|
|
||||||
DiagnosticLevel::Fatal,
|
|
||||||
DiagnosticCode::EmptyFile,
|
|
||||||
"Archive file is empty or unreadable".to_string(),
|
|
||||||
)]);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
let header: Value = match serde_json::from_str(&header_line) {
|
|
||||||
Ok(h) => h,
|
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
return Err(vec![Diagnostic::new(
|
return Err(vec![Diagnostic::new(
|
||||||
DiagnosticLevel::Fatal,
|
DiagnosticLevel::Fatal,
|
||||||
DiagnosticCode::MissingHeader,
|
DiagnosticCode::PathNotFound,
|
||||||
format!("I couldn't parse the header: {}", e),
|
format!("I couldn't read the archive file: {}", e),
|
||||||
)]);
|
)]);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
let created_str = header["created"].as_str().unwrap_or("");
|
// Check for fatal diagnostics from initial parsing
|
||||||
let created: DateTime<Utc> = match created_str.parse() {
|
if event_iter.diagnostics.has_fatal() {
|
||||||
Ok(dt) => dt,
|
return Err(event_iter.diagnostics.diagnostics().to_vec());
|
||||||
Err(_) => Utc::now(),
|
}
|
||||||
};
|
|
||||||
|
let mut observations = Vec::new();
|
||||||
|
let mut current_state = initial_state.clone();
|
||||||
|
let mut snapshot_count = 0;
|
||||||
|
|
||||||
let initial_state = header["initial"].clone();
|
|
||||||
let initial_size = serde_json::to_string(&initial_state)
|
let initial_size = serde_json::to_string(&initial_state)
|
||||||
.unwrap_or_default()
|
.unwrap_or_default()
|
||||||
.len();
|
.len();
|
||||||
|
|
||||||
|
let created = event_iter.header.created;
|
||||||
|
|
||||||
// Add initial state as observation 0
|
// Add initial state as observation 0
|
||||||
observations.push(ObservationInfo {
|
observations.push(ObservationInfo {
|
||||||
id: "initial".to_string(),
|
id: "initial".to_string(),
|
||||||
|
|
@ -281,54 +292,73 @@ fn collect_observations(file_path: &Path) -> Result<Vec<ObservationInfo>, Vec<Di
|
||||||
json_size: initial_size,
|
json_size: initial_size,
|
||||||
});
|
});
|
||||||
|
|
||||||
let mut current_state = initial_state;
|
// Iterate through events
|
||||||
|
while let Some(event) = event_iter.next() {
|
||||||
// Parse events
|
match event {
|
||||||
for line in lines {
|
Event::Observe { observation_id, timestamp, change_count } => {
|
||||||
let line = match line {
|
|
||||||
Ok(l) => l,
|
|
||||||
Err(_) => continue,
|
|
||||||
};
|
|
||||||
|
|
||||||
if line.trim().starts_with('#') || line.trim().is_empty() {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
let event: Value = match serde_json::from_str(&line) {
|
|
||||||
Ok(e) => e,
|
|
||||||
Err(_) => continue,
|
|
||||||
};
|
|
||||||
|
|
||||||
if let Some(arr) = event.as_array() {
|
|
||||||
if arr.is_empty() {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
let event_type = arr[0].as_str().unwrap_or("");
|
|
||||||
|
|
||||||
if event_type == "observe" && arr.len() >= 4 {
|
|
||||||
let obs_id = arr[1].as_str().unwrap_or("").to_string();
|
|
||||||
let timestamp_str = arr[2].as_str().unwrap_or("");
|
|
||||||
let change_count = arr[3].as_u64().unwrap_or(0) as usize;
|
|
||||||
|
|
||||||
let timestamp: DateTime<Utc> = match timestamp_str.parse() {
|
|
||||||
Ok(dt) => dt,
|
|
||||||
Err(_) => continue,
|
|
||||||
};
|
|
||||||
|
|
||||||
observations.push(ObservationInfo {
|
observations.push(ObservationInfo {
|
||||||
id: obs_id,
|
id: observation_id,
|
||||||
timestamp,
|
timestamp,
|
||||||
created,
|
created,
|
||||||
change_count,
|
change_count,
|
||||||
json_size: 0, // Will be calculated after applying events
|
json_size: 0, // Will be calculated after applying events
|
||||||
});
|
});
|
||||||
} else {
|
}
|
||||||
// Apply the event to current_state for size calculation
|
Event::Add { path, value, .. } => {
|
||||||
apply_event_to_state(&mut current_state, &arr);
|
let _ = json_archive::apply_add(&mut current_state, &path, value);
|
||||||
|
|
||||||
// Update the JSON size of the last observation
|
// Update the JSON size of the last observation
|
||||||
if let Some(last_obs) = observations.last_mut() {
|
if let Some(last_obs) = observations.last_mut() {
|
||||||
|
if last_obs.id != "initial" {
|
||||||
|
last_obs.json_size = serde_json::to_string(¤t_state)
|
||||||
|
.unwrap_or_default()
|
||||||
|
.len();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Event::Change { path, new_value, .. } => {
|
||||||
|
let _ = json_archive::apply_change(&mut current_state, &path, new_value);
|
||||||
|
|
||||||
|
// Update the JSON size of the last observation
|
||||||
|
if let Some(last_obs) = observations.last_mut() {
|
||||||
|
if last_obs.id != "initial" {
|
||||||
|
last_obs.json_size = serde_json::to_string(¤t_state)
|
||||||
|
.unwrap_or_default()
|
||||||
|
.len();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Event::Remove { path, .. } => {
|
||||||
|
let _ = json_archive::apply_remove(&mut current_state, &path);
|
||||||
|
|
||||||
|
// Update the JSON size of the last observation
|
||||||
|
if let Some(last_obs) = observations.last_mut() {
|
||||||
|
if last_obs.id != "initial" {
|
||||||
|
last_obs.json_size = serde_json::to_string(¤t_state)
|
||||||
|
.unwrap_or_default()
|
||||||
|
.len();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Event::Move { path, moves, .. } => {
|
||||||
|
let _ = json_archive::apply_move(&mut current_state, &path, moves);
|
||||||
|
|
||||||
|
// Update the JSON size of the last observation
|
||||||
|
if let Some(last_obs) = observations.last_mut() {
|
||||||
|
if last_obs.id != "initial" {
|
||||||
|
last_obs.json_size = serde_json::to_string(¤t_state)
|
||||||
|
.unwrap_or_default()
|
||||||
|
.len();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Event::Snapshot { object, .. } => {
|
||||||
|
current_state = object;
|
||||||
|
snapshot_count += 1;
|
||||||
|
|
||||||
|
// Update the JSON size of the last observation
|
||||||
|
if let Some(last_obs) = observations.last_mut() {
|
||||||
|
if last_obs.id != "initial" {
|
||||||
last_obs.json_size = serde_json::to_string(¤t_state)
|
last_obs.json_size = serde_json::to_string(¤t_state)
|
||||||
.unwrap_or_default()
|
.unwrap_or_default()
|
||||||
.len();
|
.len();
|
||||||
|
|
@ -336,41 +366,11 @@ fn collect_observations(file_path: &Path) -> Result<Vec<ObservationInfo>, Vec<Di
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Ok(observations)
|
Ok((observations, snapshot_count))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn apply_event_to_state(state: &mut Value, event: &[Value]) {
|
|
||||||
if event.is_empty() {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
let event_type = event[0].as_str().unwrap_or("");
|
|
||||||
|
|
||||||
match event_type {
|
|
||||||
"add" if event.len() >= 3 => {
|
|
||||||
let path = event[1].as_str().unwrap_or("");
|
|
||||||
let value = event[2].clone();
|
|
||||||
if let Ok(pointer) = json_archive::pointer::JsonPointer::new(path) {
|
|
||||||
let _ = pointer.set(state, value);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
"change" if event.len() >= 3 => {
|
|
||||||
let path = event[1].as_str().unwrap_or("");
|
|
||||||
let value = event[2].clone();
|
|
||||||
if let Ok(pointer) = json_archive::pointer::JsonPointer::new(path) {
|
|
||||||
let _ = pointer.set(state, value);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
"remove" if event.len() >= 2 => {
|
|
||||||
let path = event[1].as_str().unwrap_or("");
|
|
||||||
if let Ok(pointer) = json_archive::pointer::JsonPointer::new(path) {
|
|
||||||
let _ = pointer.remove(state);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
_ => {}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn format_timestamp(dt: &DateTime<Utc>) -> String {
|
fn format_timestamp(dt: &DateTime<Utc>) -> String {
|
||||||
dt.format("%a %H:%M:%S %d-%b-%Y").to_string()
|
dt.format("%a %H:%M:%S %d-%b-%Y").to_string()
|
||||||
|
|
@ -393,18 +393,3 @@ fn format_size(bytes: u64) -> String {
|
||||||
format!("{:.1} MB", bytes as f64 / (1024.0 * 1024.0))
|
format!("{:.1} MB", bytes as f64 / (1024.0 * 1024.0))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn count_snapshots(file_path: &Path) -> Result<usize, std::io::Error> {
|
|
||||||
let file = File::open(file_path)?;
|
|
||||||
let reader = BufReader::new(file);
|
|
||||||
let mut count = 0;
|
|
||||||
|
|
||||||
for line in reader.lines() {
|
|
||||||
let line = line?;
|
|
||||||
if line.trim().starts_with('[') && line.contains("\"snapshot\"") {
|
|
||||||
count += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(count)
|
|
||||||
}
|
|
||||||
|
|
|
||||||
|
|
@ -36,4 +36,4 @@ pub use detection::is_json_archive;
|
||||||
pub use diagnostics::{Diagnostic, DiagnosticCode, DiagnosticCollector, DiagnosticLevel};
|
pub use diagnostics::{Diagnostic, DiagnosticCode, DiagnosticCollector, DiagnosticLevel};
|
||||||
pub use events::{Event, Header, Observation};
|
pub use events::{Event, Header, Observation};
|
||||||
pub use pointer::JsonPointer;
|
pub use pointer::JsonPointer;
|
||||||
pub use reader::{ArchiveReader, ReadMode, ReadResult};
|
pub use reader::{apply_add, apply_change, apply_move, apply_remove, ArchiveReader, ReadMode, ReadResult};
|
||||||
|
|
|
||||||
445
src/reader.rs
445
src/reader.rs
|
|
@ -22,7 +22,7 @@
|
||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::{BufRead, BufReader};
|
use std::io::{BufRead, BufReader, Read};
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
|
||||||
use crate::diagnostics::{Diagnostic, DiagnosticCode, DiagnosticCollector, DiagnosticLevel};
|
use crate::diagnostics::{Diagnostic, DiagnosticCode, DiagnosticCollector, DiagnosticLevel};
|
||||||
|
|
@ -30,12 +30,29 @@ use crate::event_deserialize::EventDeserializer;
|
||||||
use crate::events::{Event, Header};
|
use crate::events::{Event, Header};
|
||||||
use crate::pointer::JsonPointer;
|
use crate::pointer::JsonPointer;
|
||||||
|
|
||||||
|
#[cfg(feature = "compression")]
|
||||||
|
use flate2::read::{DeflateDecoder, GzDecoder, ZlibDecoder};
|
||||||
|
#[cfg(feature = "compression")]
|
||||||
|
use brotli::Decompressor;
|
||||||
|
#[cfg(feature = "compression")]
|
||||||
|
use zstd::stream::read::Decoder as ZstdDecoder;
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
pub enum ReadMode {
|
pub enum ReadMode {
|
||||||
FullValidation,
|
FullValidation,
|
||||||
AppendSeek,
|
AppendSeek,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
|
enum CompressionFormat {
|
||||||
|
Gzip,
|
||||||
|
Deflate,
|
||||||
|
Zlib,
|
||||||
|
Brotli,
|
||||||
|
Zstd,
|
||||||
|
None,
|
||||||
|
}
|
||||||
|
|
||||||
pub struct ArchiveReader {
|
pub struct ArchiveReader {
|
||||||
mode: ReadMode,
|
mode: ReadMode,
|
||||||
filename: String,
|
filename: String,
|
||||||
|
|
@ -49,134 +66,46 @@ pub struct ReadResult {
|
||||||
pub observation_count: usize,
|
pub observation_count: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ArchiveReader {
|
pub struct EventIterator {
|
||||||
pub fn new<P: AsRef<Path>>(path: P, mode: ReadMode) -> std::io::Result<Self> {
|
reader: Box<dyn BufRead>,
|
||||||
let filename = path.as_ref().display().to_string();
|
pub diagnostics: DiagnosticCollector,
|
||||||
Ok(Self { mode, filename })
|
pub header: Header,
|
||||||
}
|
filename: String,
|
||||||
|
line_number: usize,
|
||||||
|
}
|
||||||
|
|
||||||
pub fn read<P: AsRef<Path>>(&self, path: P) -> std::io::Result<ReadResult> {
|
impl Iterator for EventIterator {
|
||||||
let file = File::open(path)?;
|
type Item = Event;
|
||||||
let reader = BufReader::new(file);
|
|
||||||
let mut diagnostics = DiagnosticCollector::new();
|
|
||||||
|
|
||||||
let mut lines_iter = reader.lines().enumerate();
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
|
let mut line = String::new();
|
||||||
|
|
||||||
let (header_line_number, header_line) = match lines_iter.next() {
|
loop {
|
||||||
Some((idx, Ok(line))) => (idx + 1, line),
|
line.clear();
|
||||||
Some((idx, Err(e))) if e.kind() == std::io::ErrorKind::InvalidData => {
|
self.line_number += 1;
|
||||||
let line_number = idx + 1;
|
|
||||||
diagnostics.add(
|
|
||||||
Diagnostic::new(
|
|
||||||
DiagnosticLevel::Fatal,
|
|
||||||
DiagnosticCode::InvalidUtf8,
|
|
||||||
format!("I found invalid UTF-8 bytes at line {}.", line_number)
|
|
||||||
)
|
|
||||||
.with_location(self.filename.clone(), line_number)
|
|
||||||
.with_advice(
|
|
||||||
"The JSON Archive format requires UTF-8 encoding. Make sure the file \
|
|
||||||
was saved with UTF-8 encoding, not Latin-1, Windows-1252, or another encoding."
|
|
||||||
.to_string()
|
|
||||||
)
|
|
||||||
);
|
|
||||||
return Ok(ReadResult {
|
|
||||||
header: Header::new(Value::Null, None),
|
|
||||||
final_state: Value::Null,
|
|
||||||
diagnostics,
|
|
||||||
observation_count: 0,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
Some((_, Err(e))) => return Err(e),
|
|
||||||
None => {
|
|
||||||
diagnostics.add(
|
|
||||||
Diagnostic::new(
|
|
||||||
DiagnosticLevel::Fatal,
|
|
||||||
DiagnosticCode::EmptyFile,
|
|
||||||
"I found an empty file, but I need at least a header line.".to_string(),
|
|
||||||
)
|
|
||||||
.with_location(self.filename.clone(), 1)
|
|
||||||
.with_advice(
|
|
||||||
"A valid JSON Archive file must start with a header object containing:\n\
|
|
||||||
- type: \"@peoplesgrocers/json-archive\"\n\
|
|
||||||
- version: 1\n\
|
|
||||||
- created: an ISO-8601 timestamp\n\
|
|
||||||
- initial: the initial state of the object"
|
|
||||||
.to_string(),
|
|
||||||
),
|
|
||||||
);
|
|
||||||
return Ok(ReadResult {
|
|
||||||
header: Header::new(Value::Null, None),
|
|
||||||
final_state: Value::Null,
|
|
||||||
diagnostics,
|
|
||||||
observation_count: 0,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
let header = match self.parse_header(&header_line, header_line_number, &mut diagnostics) {
|
|
||||||
Some(h) => h,
|
|
||||||
None => {
|
|
||||||
return Ok(ReadResult {
|
|
||||||
header: Header::new(Value::Null, None),
|
|
||||||
final_state: Value::Null,
|
|
||||||
diagnostics,
|
|
||||||
observation_count: 0,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
let mut state = header.initial.clone();
|
|
||||||
let mut seen_observations: HashSet<String> = HashSet::new();
|
|
||||||
let mut current_observation: Option<(String, usize, usize)> = None;
|
|
||||||
let mut events_in_observation = 0;
|
|
||||||
let mut observation_count = 0;
|
|
||||||
|
|
||||||
// This manual dispatcher mirrors what serde would expand but stays explicit so we can
|
|
||||||
// attach Elm-style diagnostics with precise spans and guidance for each failure case.
|
|
||||||
for (idx, line_result) in lines_iter {
|
|
||||||
let line_number = idx + 1;
|
|
||||||
let line = match line_result {
|
|
||||||
Ok(line) => line,
|
|
||||||
Err(e) if e.kind() == std::io::ErrorKind::InvalidData => {
|
|
||||||
diagnostics.add(
|
|
||||||
Diagnostic::new(
|
|
||||||
DiagnosticLevel::Fatal,
|
|
||||||
DiagnosticCode::InvalidUtf8,
|
|
||||||
format!("I found invalid UTF-8 bytes at line {}.", line_number)
|
|
||||||
)
|
|
||||||
.with_location(self.filename.clone(), line_number)
|
|
||||||
.with_advice(
|
|
||||||
"The JSON Archive format requires UTF-8 encoding. Make sure the file \
|
|
||||||
was saved with UTF-8 encoding, not Latin-1, Windows-1252, or another encoding."
|
|
||||||
.to_string()
|
|
||||||
)
|
|
||||||
);
|
|
||||||
return Ok(ReadResult {
|
|
||||||
header: Header::new(Value::Null, None),
|
|
||||||
final_state: Value::Null,
|
|
||||||
diagnostics,
|
|
||||||
observation_count: 0,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
Err(e) => return Err(e),
|
|
||||||
};
|
|
||||||
|
|
||||||
|
match self.reader.read_line(&mut line) {
|
||||||
|
Ok(0) => return None, // EOF
|
||||||
|
Ok(_) => {
|
||||||
let trimmed = line.trim();
|
let trimmed = line.trim();
|
||||||
|
|
||||||
|
// Skip comments and blank lines
|
||||||
if trimmed.starts_with('#') || trimmed.is_empty() {
|
if trimmed.starts_with('#') || trimmed.is_empty() {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Try to parse as event
|
||||||
let event_deserializer = match serde_json::from_str::<EventDeserializer>(&line) {
|
let event_deserializer = match serde_json::from_str::<EventDeserializer>(&line) {
|
||||||
Ok(d) => d,
|
Ok(d) => d,
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
diagnostics.add(
|
self.diagnostics.add(
|
||||||
Diagnostic::new(
|
Diagnostic::new(
|
||||||
DiagnosticLevel::Fatal,
|
DiagnosticLevel::Fatal,
|
||||||
DiagnosticCode::InvalidEventJson,
|
DiagnosticCode::InvalidEventJson,
|
||||||
format!("I couldn't parse this line as JSON: {}", e),
|
format!("I couldn't parse this line as JSON: {}", e),
|
||||||
)
|
)
|
||||||
.with_location(self.filename.clone(), line_number)
|
.with_location(self.filename.clone(), self.line_number)
|
||||||
.with_snippet(format!("{} | {}", line_number, line))
|
.with_snippet(format!("{} | {}", self.line_number, line.trim()))
|
||||||
.with_advice(
|
.with_advice(
|
||||||
"Each line after the header must be either:\n\
|
"Each line after the header must be either:\n\
|
||||||
- A comment starting with #\n\
|
- A comment starting with #\n\
|
||||||
|
|
@ -189,31 +118,261 @@ impl ArchiveReader {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// Add any diagnostics from deserialization with location info
|
// Add any diagnostics from deserialization
|
||||||
for diagnostic in event_deserializer.diagnostics {
|
for diagnostic in event_deserializer.diagnostics {
|
||||||
diagnostics.add(
|
self.diagnostics.add(
|
||||||
diagnostic
|
diagnostic
|
||||||
.with_location(self.filename.clone(), line_number)
|
.with_location(self.filename.clone(), self.line_number)
|
||||||
.with_snippet(format!("{} | {}", line_number, line))
|
.with_snippet(format!("{} | {}", self.line_number, line.trim()))
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Continue processing to collect additional errors before failing.
|
// Return event if we have one
|
||||||
// Even though this function must now return an error, we continue to help
|
if let Some(event) = event_deserializer.event {
|
||||||
// the user identify all issues in the file at once rather than one at a time.
|
return Some(event);
|
||||||
let event = match event_deserializer.event {
|
}
|
||||||
Some(e) => e,
|
|
||||||
None => {
|
// If no event but had diagnostics, continue to next line
|
||||||
assert!(diagnostics.has_fatal(), "Expected a fatal diagnostic when deserialization fails");
|
continue;
|
||||||
continue
|
}
|
||||||
},
|
Err(e) if e.kind() == std::io::ErrorKind::InvalidData => {
|
||||||
|
self.diagnostics.add(
|
||||||
|
Diagnostic::new(
|
||||||
|
DiagnosticLevel::Fatal,
|
||||||
|
DiagnosticCode::InvalidUtf8,
|
||||||
|
format!("I found invalid UTF-8 bytes at line {}.", self.line_number)
|
||||||
|
)
|
||||||
|
.with_location(self.filename.clone(), self.line_number)
|
||||||
|
.with_advice(
|
||||||
|
"The JSON Archive format requires UTF-8 encoding. Make sure the file \
|
||||||
|
was saved with UTF-8 encoding, not Latin-1, Windows-1252, or another encoding."
|
||||||
|
.to_string()
|
||||||
|
)
|
||||||
|
);
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
Err(_) => return None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn detect_compression_format(path: &Path, bytes: &[u8]) -> CompressionFormat {
|
||||||
|
if bytes.len() < 4 {
|
||||||
|
return CompressionFormat::None;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Gzip magic number: 0x1f 0x8b
|
||||||
|
if bytes[0] == 0x1f && bytes[1] == 0x8b {
|
||||||
|
return CompressionFormat::Gzip;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Zlib magic number: 0x78 followed by 0x01, 0x5e, 0x9c, or 0xda
|
||||||
|
if bytes[0] == 0x78 && (bytes[1] == 0x01 || bytes[1] == 0x5e || bytes[1] == 0x9c || bytes[1] == 0xda) {
|
||||||
|
return CompressionFormat::Zlib;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Zstd magic number: 0x28 0xb5 0x2f 0xfd
|
||||||
|
if bytes.len() >= 4 && bytes[0] == 0x28 && bytes[1] == 0xb5 && bytes[2] == 0x2f && bytes[3] == 0xfd {
|
||||||
|
return CompressionFormat::Zstd;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check file extension for brotli (no reliable magic number) and deflate
|
||||||
|
if let Some(ext) = path.extension() {
|
||||||
|
let ext_str = ext.to_string_lossy();
|
||||||
|
if ext_str == "br" || path.to_string_lossy().contains(".br.") {
|
||||||
|
return CompressionFormat::Brotli;
|
||||||
|
}
|
||||||
|
if ext_str == "deflate" {
|
||||||
|
return CompressionFormat::Deflate;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
CompressionFormat::None
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ArchiveReader {
|
||||||
|
pub fn new<P: AsRef<Path>>(path: P, mode: ReadMode) -> std::io::Result<Self> {
|
||||||
|
let filename = path.as_ref().display().to_string();
|
||||||
|
Ok(Self { mode, filename })
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn events<P: AsRef<Path>>(&self, path: P) -> std::io::Result<(Value, EventIterator)> {
|
||||||
|
let path = path.as_ref();
|
||||||
|
let mut file = File::open(path)?;
|
||||||
|
|
||||||
|
// Detect compression format
|
||||||
|
let mut magic_bytes = [0u8; 4];
|
||||||
|
let bytes_read = file.read(&mut magic_bytes)?;
|
||||||
|
let compression_format = detect_compression_format(path, &magic_bytes[..bytes_read]);
|
||||||
|
|
||||||
|
// Re-open file to reset position
|
||||||
|
file = File::open(path)?;
|
||||||
|
|
||||||
|
let mut diagnostics = DiagnosticCollector::new();
|
||||||
|
|
||||||
|
// Check if compression is detected but not supported
|
||||||
|
#[cfg(not(feature = "compression"))]
|
||||||
|
if compression_format != CompressionFormat::None {
|
||||||
|
let format_name = match compression_format {
|
||||||
|
CompressionFormat::Gzip => "gzip",
|
||||||
|
CompressionFormat::Deflate => "deflate",
|
||||||
|
CompressionFormat::Zlib => "zlib",
|
||||||
|
CompressionFormat::Brotli => "brotli",
|
||||||
|
CompressionFormat::Zstd => "zstd",
|
||||||
|
CompressionFormat::None => unreachable!(),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
diagnostics.add(
|
||||||
|
Diagnostic::new(
|
||||||
|
DiagnosticLevel::Fatal,
|
||||||
|
DiagnosticCode::UnsupportedVersion,
|
||||||
|
format!("I detected a {}-compressed archive, but this build doesn't support compression.", format_name)
|
||||||
|
)
|
||||||
|
.with_location(self.filename.clone(), 1)
|
||||||
|
.with_advice(
|
||||||
|
"This binary was built without compression support to reduce binary size and dependencies.\n\
|
||||||
|
You have two options:\n\
|
||||||
|
1. Install the version with compression support: cargo install json-archive --features compression\n\
|
||||||
|
2. Manually decompress the file first, then use this tool on the uncompressed archive"
|
||||||
|
.to_string()
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
// Return dummy values with fatal diagnostic
|
||||||
|
let iterator = EventIterator {
|
||||||
|
reader: Box::new(BufReader::new(std::io::empty())),
|
||||||
|
diagnostics,
|
||||||
|
header: Header::new(Value::Null, None),
|
||||||
|
filename: self.filename.clone(),
|
||||||
|
line_number: 1,
|
||||||
|
};
|
||||||
|
return Ok((Value::Null, iterator));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create appropriate reader based on compression format
|
||||||
|
#[cfg(feature = "compression")]
|
||||||
|
let reader: Box<dyn BufRead> = match compression_format {
|
||||||
|
CompressionFormat::Gzip => Box::new(BufReader::new(GzDecoder::new(file))),
|
||||||
|
CompressionFormat::Deflate => Box::new(BufReader::new(DeflateDecoder::new(file))),
|
||||||
|
CompressionFormat::Zlib => Box::new(BufReader::new(ZlibDecoder::new(file))),
|
||||||
|
CompressionFormat::Brotli => Box::new(BufReader::new(Decompressor::new(file, 4096))),
|
||||||
|
CompressionFormat::Zstd => Box::new(BufReader::new(ZstdDecoder::new(file)?)),
|
||||||
|
CompressionFormat::None => Box::new(BufReader::new(file)),
|
||||||
|
};
|
||||||
|
|
||||||
|
#[cfg(not(feature = "compression"))]
|
||||||
|
let reader: Box<dyn BufRead> = Box::new(BufReader::new(file));
|
||||||
|
|
||||||
|
let mut reader = reader;
|
||||||
|
let mut header_line = String::new();
|
||||||
|
|
||||||
|
let _bytes_read = match reader.read_line(&mut header_line) {
|
||||||
|
Ok(0) => {
|
||||||
|
// Empty file
|
||||||
|
diagnostics.add(
|
||||||
|
Diagnostic::new(
|
||||||
|
DiagnosticLevel::Fatal,
|
||||||
|
DiagnosticCode::EmptyFile,
|
||||||
|
"I found an empty file, but I need at least a header line.".to_string(),
|
||||||
|
)
|
||||||
|
.with_location(self.filename.clone(), 1)
|
||||||
|
.with_advice(
|
||||||
|
"See the file format specification for header structure."
|
||||||
|
.to_string(),
|
||||||
|
),
|
||||||
|
);
|
||||||
|
let iterator = EventIterator {
|
||||||
|
reader,
|
||||||
|
diagnostics,
|
||||||
|
header: Header::new(Value::Null, None),
|
||||||
|
filename: self.filename.clone(),
|
||||||
|
line_number: 1,
|
||||||
|
};
|
||||||
|
return Ok((Value::Null, iterator));
|
||||||
|
}
|
||||||
|
Ok(n) => n,
|
||||||
|
Err(e) if e.kind() == std::io::ErrorKind::InvalidData => {
|
||||||
|
// UTF-8 error
|
||||||
|
diagnostics.add(
|
||||||
|
Diagnostic::new(
|
||||||
|
DiagnosticLevel::Fatal,
|
||||||
|
DiagnosticCode::InvalidUtf8,
|
||||||
|
"I found invalid UTF-8 bytes at line 1.".to_string()
|
||||||
|
)
|
||||||
|
.with_location(self.filename.clone(), 1)
|
||||||
|
.with_advice(
|
||||||
|
"The JSON Archive format requires UTF-8 encoding. Make sure the file \
|
||||||
|
was saved with UTF-8 encoding, not Latin-1, Windows-1252, or another encoding."
|
||||||
|
.to_string()
|
||||||
|
)
|
||||||
|
);
|
||||||
|
let iterator = EventIterator {
|
||||||
|
reader,
|
||||||
|
diagnostics,
|
||||||
|
header: Header::new(Value::Null, None),
|
||||||
|
filename: self.filename.clone(),
|
||||||
|
line_number: 1,
|
||||||
|
};
|
||||||
|
return Ok((Value::Null, iterator));
|
||||||
|
}
|
||||||
|
Err(e) => return Err(e),
|
||||||
|
};
|
||||||
|
|
||||||
|
let header = match self.parse_header(&header_line, 1, &mut diagnostics) {
|
||||||
|
Some(h) => h,
|
||||||
|
None => {
|
||||||
|
let iterator = EventIterator {
|
||||||
|
reader,
|
||||||
|
diagnostics,
|
||||||
|
header: Header::new(Value::Null, None),
|
||||||
|
filename: self.filename.clone(),
|
||||||
|
line_number: 1,
|
||||||
|
};
|
||||||
|
return Ok((Value::Null, iterator));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let iterator = EventIterator {
|
||||||
|
reader,
|
||||||
|
diagnostics,
|
||||||
|
header: header.clone(),
|
||||||
|
filename: self.filename.clone(),
|
||||||
|
line_number: 1,
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok((header.initial, iterator))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn read<P: AsRef<Path>>(&self, path: P) -> std::io::Result<ReadResult> {
|
||||||
|
let (initial_value, mut event_iter) = self.events(&path)?;
|
||||||
|
|
||||||
|
// Check for early fatal diagnostics (like compression not supported)
|
||||||
|
if event_iter.diagnostics.has_fatal() {
|
||||||
|
return Ok(ReadResult {
|
||||||
|
header: Header::new(Value::Null, None),
|
||||||
|
final_state: Value::Null,
|
||||||
|
diagnostics: event_iter.diagnostics,
|
||||||
|
observation_count: 0,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
let header = Header::new(initial_value.clone(), None);
|
||||||
|
let mut state = initial_value;
|
||||||
|
let mut seen_observations: HashSet<String> = HashSet::new();
|
||||||
|
let mut current_observation: Option<(String, usize, usize)> = None;
|
||||||
|
let mut events_in_observation = 0;
|
||||||
|
let mut observation_count = 0;
|
||||||
|
|
||||||
|
// Process events from iterator
|
||||||
|
while let Some(event) = event_iter.next() {
|
||||||
|
let line_number = event_iter.line_number;
|
||||||
|
|
||||||
match event {
|
match event {
|
||||||
Event::Observe { observation_id, timestamp: _, change_count } => {
|
Event::Observe { observation_id, timestamp: _, change_count } => {
|
||||||
if let Some((_obs_id, obs_line, expected_count)) = ¤t_observation {
|
if let Some((_obs_id, obs_line, expected_count)) = ¤t_observation {
|
||||||
if events_in_observation != *expected_count {
|
if events_in_observation != *expected_count {
|
||||||
diagnostics.add(
|
event_iter.diagnostics.add(
|
||||||
Diagnostic::new(
|
Diagnostic::new(
|
||||||
DiagnosticLevel::Warning,
|
DiagnosticLevel::Warning,
|
||||||
DiagnosticCode::ChangeCountMismatch,
|
DiagnosticCode::ChangeCountMismatch,
|
||||||
|
|
@ -233,7 +392,7 @@ impl ArchiveReader {
|
||||||
}
|
}
|
||||||
|
|
||||||
if seen_observations.contains(&observation_id) {
|
if seen_observations.contains(&observation_id) {
|
||||||
diagnostics.add(
|
event_iter.diagnostics.add(
|
||||||
Diagnostic::new(
|
Diagnostic::new(
|
||||||
DiagnosticLevel::Warning,
|
DiagnosticLevel::Warning,
|
||||||
DiagnosticCode::DuplicateObservationId,
|
DiagnosticCode::DuplicateObservationId,
|
||||||
|
|
@ -260,14 +419,13 @@ impl ArchiveReader {
|
||||||
if self.mode == ReadMode::FullValidation
|
if self.mode == ReadMode::FullValidation
|
||||||
&& !seen_observations.contains(&observation_id)
|
&& !seen_observations.contains(&observation_id)
|
||||||
{
|
{
|
||||||
diagnostics.add(
|
event_iter.diagnostics.add(
|
||||||
Diagnostic::new(
|
Diagnostic::new(
|
||||||
DiagnosticLevel::Fatal,
|
DiagnosticLevel::Fatal,
|
||||||
DiagnosticCode::NonExistentObservationId,
|
DiagnosticCode::NonExistentObservationId,
|
||||||
format!("I found a reference to observation '{}', but I haven't seen an observe event with that ID yet.", observation_id)
|
format!("I found a reference to observation '{}', but I haven't seen an observe event with that ID yet.", observation_id)
|
||||||
)
|
)
|
||||||
.with_location(self.filename.clone(), line_number)
|
.with_location(self.filename.clone(), line_number)
|
||||||
.with_snippet(format!("{} | {}", line_number, line))
|
|
||||||
.with_advice(
|
.with_advice(
|
||||||
"Each add/change/remove/move event must reference an observation ID from a preceding observe event."
|
"Each add/change/remove/move event must reference an observation ID from a preceding observe event."
|
||||||
.to_string()
|
.to_string()
|
||||||
|
|
@ -277,7 +435,7 @@ impl ArchiveReader {
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Err(diag) = apply_add(&mut state, &path, value) {
|
if let Err(diag) = apply_add(&mut state, &path, value) {
|
||||||
diagnostics.add(diag.with_location(self.filename.clone(), line_number));
|
event_iter.diagnostics.add(diag.with_location(self.filename.clone(), line_number));
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -288,7 +446,7 @@ impl ArchiveReader {
|
||||||
if self.mode == ReadMode::FullValidation
|
if self.mode == ReadMode::FullValidation
|
||||||
&& !seen_observations.contains(&observation_id)
|
&& !seen_observations.contains(&observation_id)
|
||||||
{
|
{
|
||||||
diagnostics.add(
|
event_iter.diagnostics.add(
|
||||||
Diagnostic::new(
|
Diagnostic::new(
|
||||||
DiagnosticLevel::Fatal,
|
DiagnosticLevel::Fatal,
|
||||||
DiagnosticCode::NonExistentObservationId,
|
DiagnosticCode::NonExistentObservationId,
|
||||||
|
|
@ -300,7 +458,7 @@ impl ArchiveReader {
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Err(diag) = apply_change(&mut state, &path, new_value) {
|
if let Err(diag) = apply_change(&mut state, &path, new_value) {
|
||||||
diagnostics.add(diag.with_location(self.filename.clone(), line_number));
|
event_iter.diagnostics.add(diag.with_location(self.filename.clone(), line_number));
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -311,7 +469,7 @@ impl ArchiveReader {
|
||||||
if self.mode == ReadMode::FullValidation
|
if self.mode == ReadMode::FullValidation
|
||||||
&& !seen_observations.contains(&observation_id)
|
&& !seen_observations.contains(&observation_id)
|
||||||
{
|
{
|
||||||
diagnostics.add(
|
event_iter.diagnostics.add(
|
||||||
Diagnostic::new(
|
Diagnostic::new(
|
||||||
DiagnosticLevel::Fatal,
|
DiagnosticLevel::Fatal,
|
||||||
DiagnosticCode::NonExistentObservationId,
|
DiagnosticCode::NonExistentObservationId,
|
||||||
|
|
@ -323,7 +481,7 @@ impl ArchiveReader {
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Err(diag) = apply_remove(&mut state, &path) {
|
if let Err(diag) = apply_remove(&mut state, &path) {
|
||||||
diagnostics.add(diag.with_location(self.filename.clone(), line_number));
|
event_iter.diagnostics.add(diag.with_location(self.filename.clone(), line_number));
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -334,7 +492,7 @@ impl ArchiveReader {
|
||||||
if self.mode == ReadMode::FullValidation
|
if self.mode == ReadMode::FullValidation
|
||||||
&& !seen_observations.contains(&observation_id)
|
&& !seen_observations.contains(&observation_id)
|
||||||
{
|
{
|
||||||
diagnostics.add(
|
event_iter.diagnostics.add(
|
||||||
Diagnostic::new(
|
Diagnostic::new(
|
||||||
DiagnosticLevel::Fatal,
|
DiagnosticLevel::Fatal,
|
||||||
DiagnosticCode::NonExistentObservationId,
|
DiagnosticCode::NonExistentObservationId,
|
||||||
|
|
@ -346,14 +504,14 @@ impl ArchiveReader {
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Err(diag) = apply_move(&mut state, &path, moves) {
|
if let Err(diag) = apply_move(&mut state, &path, moves) {
|
||||||
diagnostics.add(diag.with_location(self.filename.clone(), line_number));
|
event_iter.diagnostics.add(diag.with_location(self.filename.clone(), line_number));
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Event::Snapshot { observation_id: _, timestamp: _, object } => {
|
Event::Snapshot { observation_id: _, timestamp: _, object } => {
|
||||||
if self.mode == ReadMode::FullValidation && state != object {
|
if self.mode == ReadMode::FullValidation && state != object {
|
||||||
diagnostics.add(
|
event_iter.diagnostics.add(
|
||||||
Diagnostic::new(
|
Diagnostic::new(
|
||||||
DiagnosticLevel::Fatal,
|
DiagnosticLevel::Fatal,
|
||||||
DiagnosticCode::SnapshotStateMismatch,
|
DiagnosticCode::SnapshotStateMismatch,
|
||||||
|
|
@ -376,7 +534,7 @@ impl ArchiveReader {
|
||||||
|
|
||||||
if let Some((_obs_id, obs_line, expected_count)) = ¤t_observation {
|
if let Some((_obs_id, obs_line, expected_count)) = ¤t_observation {
|
||||||
if events_in_observation != *expected_count {
|
if events_in_observation != *expected_count {
|
||||||
diagnostics.add(
|
event_iter.diagnostics.add(
|
||||||
Diagnostic::new(
|
Diagnostic::new(
|
||||||
DiagnosticLevel::Warning,
|
DiagnosticLevel::Warning,
|
||||||
DiagnosticCode::ChangeCountMismatch,
|
DiagnosticCode::ChangeCountMismatch,
|
||||||
|
|
@ -393,10 +551,11 @@ impl ArchiveReader {
|
||||||
Ok(ReadResult {
|
Ok(ReadResult {
|
||||||
header,
|
header,
|
||||||
final_state: state,
|
final_state: state,
|
||||||
diagnostics,
|
diagnostics: event_iter.diagnostics,
|
||||||
observation_count,
|
observation_count,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_header(
|
fn parse_header(
|
||||||
&self,
|
&self,
|
||||||
line: &str,
|
line: &str,
|
||||||
|
|
@ -470,7 +629,7 @@ impl ArchiveReader {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn apply_add(state: &mut Value, path: &str, value: Value) -> Result<(), Diagnostic> {
|
pub fn apply_add(state: &mut Value, path: &str, value: Value) -> Result<(), Diagnostic> {
|
||||||
let pointer = JsonPointer::new(path).map_err(|diag| {
|
let pointer = JsonPointer::new(path).map_err(|diag| {
|
||||||
diag.with_advice(
|
diag.with_advice(
|
||||||
"JSON Pointer paths must start with '/' and use '/' to separate segments.\n\
|
"JSON Pointer paths must start with '/' and use '/' to separate segments.\n\
|
||||||
|
|
@ -488,19 +647,19 @@ fn apply_add(state: &mut Value, path: &str, value: Value) -> Result<(), Diagnost
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
fn apply_change(state: &mut Value, path: &str, new_value: Value) -> Result<(), Diagnostic> {
|
pub fn apply_change(state: &mut Value, path: &str, new_value: Value) -> Result<(), Diagnostic> {
|
||||||
let pointer = JsonPointer::new(path)?;
|
let pointer = JsonPointer::new(path)?;
|
||||||
pointer.set(state, new_value)?;
|
pointer.set(state, new_value)?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn apply_remove(state: &mut Value, path: &str) -> Result<(), Diagnostic> {
|
pub fn apply_remove(state: &mut Value, path: &str) -> Result<(), Diagnostic> {
|
||||||
let pointer = JsonPointer::new(path)?;
|
let pointer = JsonPointer::new(path)?;
|
||||||
pointer.remove(state)?;
|
pointer.remove(state)?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn apply_move(
|
pub fn apply_move(
|
||||||
state: &mut Value,
|
state: &mut Value,
|
||||||
path: &str,
|
path: &str,
|
||||||
moves: Vec<(usize, usize)>,
|
moves: Vec<(usize, usize)>,
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue