feat: transparently append to compressed archives
When appending to a compressed archive (gzip, brotli, zstd), the tool now handles compression automatically. Since some compression formats don't support appending to compressed files in place, we write a new compressed file with all the data and atomically rename it to replace the original (assuming there is enough space on that filesystem). This means you can work with compressed archives the same way as uncompressed ones. Point the tool at your .json.gz file and append values. No manual decompression/recompression needed.
This commit is contained in:
parent
da0fed29de
commit
2ab1c31993
34 changed files with 4747 additions and 1099 deletions
4
.gitignore
vendored
4
.gitignore
vendored
|
|
@ -1,2 +1,6 @@
|
|||
/target
|
||||
.redo/
|
||||
|
||||
docs/demo/v1.json.archive
|
||||
tests/compression-integration/data/
|
||||
tests/compression-integration/out/
|
||||
|
|
|
|||
33
CLAUDE.md
33
CLAUDE.md
|
|
@ -1,6 +1,37 @@
|
|||
## Running Tests
|
||||
|
||||
### Unit tests
|
||||
|
||||
```bash
|
||||
cargo test # Run without compression support
|
||||
cargo test --features compression # Run with compression support
|
||||
```
|
||||
|
||||
### Integration tests
|
||||
|
||||
The compression integration tests verify appending to compressed archives (gzip, brotli, zstd):
|
||||
|
||||
```bash
|
||||
cd tests/compression-integration
|
||||
./run_all.sh # Run all compression tests
|
||||
./run_gzip_test.sh # Run only gzip test
|
||||
./run_brotli_test.sh # Run only brotli test
|
||||
./run_zstd_test.sh # Run only zstd test
|
||||
```
|
||||
|
||||
These tests:
|
||||
1. Create an uncompressed archive from the first state file
|
||||
2. Compress it with the respective tool (gzip/brotli/zstd)
|
||||
3. Append additional state files to the compressed archive
|
||||
4. Verify the archive can be read and shows the correct observation count
|
||||
|
||||
Requirements: Python 3 (for test data generation), gzip, brotli, zstd command-line tools.
|
||||
|
||||
### Fuzz testing
|
||||
|
||||
To begin fuzzing, run: `cargo fuzz run <fuzz target name>`
|
||||
|
||||
The source code for a fuzz target by default lives in `fuzz/fuzz_targets/<fuzz target name>.rs`.
|
||||
The source code for a fuzz target by default lives in `fuzz/fuzz_targets/<fuzz target name>.rs`.
|
||||
|
||||
Each fuzz target is a Rust program that is given random data and tests a crate (in this case, json-archive). Use `cargo fuzz list` to view the list of all existing fuzz targets:
|
||||
|
||||
|
|
|
|||
140
Cargo.lock
generated
140
Cargo.lock
generated
|
|
@ -49,9 +49,9 @@ checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
|
|||
|
||||
[[package]]
|
||||
name = "bitflags"
|
||||
version = "2.9.4"
|
||||
version = "2.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2261d10cca569e4643e526d8dc2e62e433cc8aba21ab764233731f8d369bf394"
|
||||
checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3"
|
||||
|
||||
[[package]]
|
||||
name = "brotli"
|
||||
|
|
@ -82,9 +82,9 @@ checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43"
|
|||
|
||||
[[package]]
|
||||
name = "cc"
|
||||
version = "1.2.39"
|
||||
version = "1.2.48"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e1354349954c6fc9cb0deab020f27f783cf0b604e8bb754dc4658ecf0d29c35f"
|
||||
checksum = "c481bdbf0ed3b892f6f806287d72acd515b352a4ec27a208489b8c1bc839633a"
|
||||
dependencies = [
|
||||
"find-msvc-tools",
|
||||
"jobserver",
|
||||
|
|
@ -94,9 +94,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "cfg-if"
|
||||
version = "1.0.3"
|
||||
version = "1.0.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9"
|
||||
checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
|
||||
|
||||
[[package]]
|
||||
name = "chrono"
|
||||
|
|
@ -156,15 +156,15 @@ checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
|
|||
|
||||
[[package]]
|
||||
name = "find-msvc-tools"
|
||||
version = "0.1.2"
|
||||
version = "0.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1ced73b1dacfc750a6db6c0a0c3a3853c8b41997e2e2c563dc90804ae6867959"
|
||||
checksum = "3a3076410a55c90011c298b04d0cfa770b00fa04e1e3c97d3f6c9de105a03844"
|
||||
|
||||
[[package]]
|
||||
name = "flate2"
|
||||
version = "1.1.2"
|
||||
version = "1.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4a3d7db9596fecd151c5f638c0ee5d5bd487b6e0ea232e5dc96d5250f6f94b1d"
|
||||
checksum = "bfe33edd8e85a12a67454e37f8c75e730830d83e313556ab9ebf9ee7fbeb3bfb"
|
||||
dependencies = [
|
||||
"crc32fast",
|
||||
"miniz_oxide",
|
||||
|
|
@ -172,14 +172,14 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "getrandom"
|
||||
version = "0.3.3"
|
||||
version = "0.3.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4"
|
||||
checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"r-efi",
|
||||
"wasi",
|
||||
"wasip2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -224,9 +224,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "js-sys"
|
||||
version = "0.3.81"
|
||||
version = "0.3.83"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ec48937a97411dcb524a265206ccd4c90bb711fca92b2792c407f268825b9305"
|
||||
checksum = "464a3709c7f55f1f721e5389aa6ea4e3bc6aba669353300af094b29ffbdde1d8"
|
||||
dependencies = [
|
||||
"once_cell",
|
||||
"wasm-bindgen",
|
||||
|
|
@ -250,9 +250,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.176"
|
||||
version = "0.2.177"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "58f929b4d672ea937a23a1ab494143d968337a5f47e56d0815df1e0890ddf174"
|
||||
checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976"
|
||||
|
||||
[[package]]
|
||||
name = "linux-raw-sys"
|
||||
|
|
@ -279,6 +279,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316"
|
||||
dependencies = [
|
||||
"adler2",
|
||||
"simd-adler32",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -304,18 +305,18 @@ checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
|
|||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.101"
|
||||
version = "1.0.103"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de"
|
||||
checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8"
|
||||
dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.40"
|
||||
version = "1.0.42"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d"
|
||||
checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
]
|
||||
|
|
@ -353,9 +354,9 @@ checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f"
|
|||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.227"
|
||||
version = "1.0.228"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "80ece43fc6fbed4eb5392ab50c07334d3e577cbf40997ee896fe7af40bba4245"
|
||||
checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
|
||||
dependencies = [
|
||||
"serde_core",
|
||||
"serde_derive",
|
||||
|
|
@ -363,18 +364,18 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "serde_core"
|
||||
version = "1.0.227"
|
||||
version = "1.0.228"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7a576275b607a2c86ea29e410193df32bc680303c82f31e275bbfcafe8b33be5"
|
||||
checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
|
||||
dependencies = [
|
||||
"serde_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_derive"
|
||||
version = "1.0.227"
|
||||
version = "1.0.228"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "51e694923b8824cf0e9b382adf0f60d4e05f348f357b38833a3fa5ed7c2ede04"
|
||||
checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
|
|
@ -401,10 +402,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.106"
|
||||
name = "simd-adler32"
|
||||
version = "0.3.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6"
|
||||
checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe"
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.111"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "390cc9a294ab71bdb1aa2e99d13be9c753cd2d7bd6560c77118597410c4d2e87"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
|
|
@ -426,9 +433,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "unicode-ident"
|
||||
version = "1.0.19"
|
||||
version = "1.0.22"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f63a545481291138910575129486daeaf8ac54aee4387fe7906919f7830c7d9d"
|
||||
checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5"
|
||||
|
||||
[[package]]
|
||||
name = "uuid"
|
||||
|
|
@ -442,15 +449,6 @@ dependencies = [
|
|||
"wasm-bindgen",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wasi"
|
||||
version = "0.14.7+wasi-0.2.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "883478de20367e224c0090af9cf5f9fa85bed63a95c1abf3afc5c083ebc06e8c"
|
||||
dependencies = [
|
||||
"wasip2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wasip2"
|
||||
version = "1.0.1+wasi-0.2.4"
|
||||
|
|
@ -462,9 +460,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "wasm-bindgen"
|
||||
version = "0.2.104"
|
||||
version = "0.2.106"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c1da10c01ae9f1ae40cbfac0bac3b1e724b320abfcf52229f80b547c0d250e2d"
|
||||
checksum = "0d759f433fa64a2d763d1340820e46e111a7a5ab75f993d1852d70b03dbb80fd"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"once_cell",
|
||||
|
|
@ -473,25 +471,11 @@ dependencies = [
|
|||
"wasm-bindgen-shared",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wasm-bindgen-backend"
|
||||
version = "0.2.104"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "671c9a5a66f49d8a47345ab942e2cb93c7d1d0339065d4f8139c486121b43b19"
|
||||
dependencies = [
|
||||
"bumpalo",
|
||||
"log",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
"wasm-bindgen-shared",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wasm-bindgen-macro"
|
||||
version = "0.2.104"
|
||||
version = "0.2.106"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7ca60477e4c59f5f2986c50191cd972e3a50d8a95603bc9434501cf156a9a119"
|
||||
checksum = "48cb0d2638f8baedbc542ed444afc0644a29166f1595371af4fecf8ce1e7eeb3"
|
||||
dependencies = [
|
||||
"quote",
|
||||
"wasm-bindgen-macro-support",
|
||||
|
|
@ -499,31 +483,31 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "wasm-bindgen-macro-support"
|
||||
version = "0.2.104"
|
||||
version = "0.2.106"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9f07d2f20d4da7b26400c9f4a0511e6e0345b040694e8a75bd41d578fa4421d7"
|
||||
checksum = "cefb59d5cd5f92d9dcf80e4683949f15ca4b511f4ac0a6e14d4e1ac60c6ecd40"
|
||||
dependencies = [
|
||||
"bumpalo",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
"wasm-bindgen-backend",
|
||||
"wasm-bindgen-shared",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wasm-bindgen-shared"
|
||||
version = "0.2.104"
|
||||
version = "0.2.106"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bad67dc8b2a1a6e5448428adec4c3e84c43e561d8c9ee8a9e5aabeb193ec41d1"
|
||||
checksum = "cbc538057e648b67f72a982e708d485b2efa771e1ac05fec311f9f63e5800db4"
|
||||
dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-core"
|
||||
version = "0.62.1"
|
||||
version = "0.62.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6844ee5416b285084d3d3fffd743b925a6c9385455f64f6d4fa3031c4c2749a9"
|
||||
checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb"
|
||||
dependencies = [
|
||||
"windows-implement",
|
||||
"windows-interface",
|
||||
|
|
@ -534,9 +518,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "windows-implement"
|
||||
version = "0.60.1"
|
||||
version = "0.60.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "edb307e42a74fb6de9bf3a02d9712678b22399c87e6fa869d6dfcd8c1b7754e0"
|
||||
checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
|
|
@ -545,9 +529,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "windows-interface"
|
||||
version = "0.59.2"
|
||||
version = "0.59.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c0abd1ddbc6964ac14db11c7213d6532ef34bd9aa042c2e5935f59d7908b46a5"
|
||||
checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
|
|
@ -556,33 +540,33 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "windows-link"
|
||||
version = "0.2.0"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "45e46c0661abb7180e7b9c281db115305d49ca1709ab8242adf09666d2173c65"
|
||||
checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
|
||||
|
||||
[[package]]
|
||||
name = "windows-result"
|
||||
version = "0.4.0"
|
||||
version = "0.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7084dcc306f89883455a206237404d3eaf961e5bd7e0f312f7c91f57eb44167f"
|
||||
checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5"
|
||||
dependencies = [
|
||||
"windows-link",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-strings"
|
||||
version = "0.5.0"
|
||||
version = "0.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7218c655a553b0bed4426cf54b20d7ba363ef543b52d515b3e48d7fd55318dda"
|
||||
checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091"
|
||||
dependencies = [
|
||||
"windows-link",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-sys"
|
||||
version = "0.61.1"
|
||||
version = "0.61.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6f109e41dd4a3c848907eb83d5a42ea98b3769495597450cf6d153507b166f0f"
|
||||
checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc"
|
||||
dependencies = [
|
||||
"windows-link",
|
||||
]
|
||||
|
|
|
|||
57
README.md
57
README.md
|
|
@ -2,11 +2,11 @@
|
|||
|
||||
A practical CLI tool for tracking JSON file changes over time. Instead of keeping multiple copies of JSON files, this creates compact delta-based archives that preserve the complete history.
|
||||
|
||||
## What it does
|
||||
## Why I made this tool
|
||||
|
||||
This tool solves a simple problem: you have a JSON file that changes regularly, and you want to track its history without storing dozens of full copies.
|
||||
The problem I am solving: I have a JSON file that changes regularly (output of a scraping pipeline), and I want to track its history without storing dozens of full copies.
|
||||
|
||||
`json-archive` creates a `.json.archive` file next to your original JSON file. Each time you run the tool, it calculates only what changed and appends those deltas to the archive. You get complete history with minimal storage overhead.
|
||||
`json-archive` creates a `.json.archive` file next to your original JSON file. Each time you run the tool, it calculates only what changed and appends those deltas to the archive. You get complete history with minimal storage overhead. It can move a .json file into the archive or leave it untouched.
|
||||
|
||||
The archive format is human-readable JSONL (not binary), making it easy to inspect, debug, and pipe into other scripts or web visualizations.
|
||||
|
||||
|
|
@ -18,9 +18,6 @@ json-archive data.json
|
|||
|
||||
# Later, append changes to existing archive
|
||||
json-archive data.json.archive data.json
|
||||
|
||||
# Or let it infer again (won't overwrite without --force)
|
||||
json-archive data.json # Safe: won't overwrite existing data.json.archive
|
||||
```
|
||||
|
||||
## Real-world use case
|
||||
|
|
@ -63,7 +60,7 @@ While the core design keeps things simple and readable, the tool does work with
|
|||
|
||||
This works fine for the happy path with archive files up to a few hundred megabytes, but contradicts the "keep it simple" design philosophy - it's included because it's practically useful.
|
||||
|
||||
**Building without compression**: Compression libraries are a security vulnerability vector. The default build includes them because most users want convenience. If you don't want to bundle compression libraries:
|
||||
**Building without compression**: Compression libraries are a security vulnerability vector. The default build includes them because I want convenience. If you don't want to bundle compression libraries:
|
||||
|
||||
```bash
|
||||
cargo install json-archive --no-default-features
|
||||
|
|
@ -157,38 +154,11 @@ cargo build --release
|
|||
|
||||
Archives use the `.json.archive` extension by default:
|
||||
|
||||
- `data.json` -> `data.json.archive`
|
||||
- `video.info.json` -> `video.info.json.archive`
|
||||
- `config.json` -> `config.json.archive`
|
||||
|
||||
This makes it immediately clear which files are archives and which are source files.
|
||||
|
||||
|
||||
## Error handling
|
||||
|
||||
The tool uses descriptive diagnostics instead of cryptic error codes:
|
||||
|
||||
```
|
||||
error: I couldn't find the input file: missing.json
|
||||
|
|
||||
= help: Make sure the file path is correct and the file exists.
|
||||
Check for typos in the filename.
|
||||
```
|
||||
|
||||
Diagnostics are categorized as Fatal, Warning, or Info, and the tool exits with non-zero status only for fatal errors.
|
||||
|
||||
## Performance characteristics
|
||||
|
||||
- **Memory usage**: Bounded by largest single JSON file, not archive size
|
||||
- **Append speed**: Fast - only computes deltas, doesn't re-read entire archive
|
||||
- **Read speed**: Linear scan, but snapshots allow seeking to recent state
|
||||
- **File size**: Typically 10-30% the size of storing all JSON copies
|
||||
|
||||
For very large archives, consider using snapshots (`-s` flag) to enable faster seeking.
|
||||
- `<filename>.json` -> `<filename>.json.archive`
|
||||
|
||||
## Browser compatibility
|
||||
|
||||
Archives can be loaded directly in web applications:
|
||||
The strength of the file format is easy browser visualization:
|
||||
|
||||
```javascript
|
||||
// Parse archive in browser
|
||||
|
|
@ -205,7 +175,8 @@ fetch('data.json.archive')
|
|||
});
|
||||
```
|
||||
|
||||
The format uses only standard JSON. No special parsing required.
|
||||
The format uses only standard JSON and organizes the data into roughly the shape
|
||||
you would need anyway.
|
||||
|
||||
## Contributing
|
||||
|
||||
|
|
@ -227,10 +198,10 @@ This project is licensed under the GNU Affero General Public License v3.0 (AGPL-
|
|||
- You can use, modify, and distribute this software
|
||||
- If you modify and distribute it, you must share your changes under the same license
|
||||
- If you run a modified version on a server or embed it in a larger system, you must make the entire system's source code available to users
|
||||
- No TiVoization - hardware restrictions that prevent users from running modified versions are prohibited
|
||||
- No TiVoization! Hardware restrictions that prevent users from running
|
||||
modified versions are prohibited. If you have a setup where you hard code a
|
||||
signing key into firmware and refuse to run any user modified programs signed
|
||||
by your secret key... then you are not allowed to use this software.
|
||||
|
||||
The AGPL ensures that improvements to this tool remain open and available to everyone, even when used in hosted services or embedded systems.
|
||||
|
||||
---
|
||||
|
||||
*Built with Rust for reliability and performance. Designed to be simple enough to understand, powerful enough to be useful.*
|
||||
The AGPL ensures that improvements to this tool remain open and available to
|
||||
everyone, even when used in hosted services or embedded systems.
|
||||
|
|
|
|||
|
|
@ -5,5 +5,13 @@ case $1 in
|
|||
addlicense -c "Peoples Grocers LLC" -f LICENSE-header -l "agpl-3.0" -s src/ >&2
|
||||
;;
|
||||
|
||||
docs/diagnostics/json-pointer.md)
|
||||
redo-ifchange src/bin/pointer_errors_demo.rs src/pointer.rs src/pointer_errors.rs
|
||||
cargo run --quiet --bin pointer_errors_demo
|
||||
;;
|
||||
|
||||
gen)
|
||||
redo docs/diagnostics/json-pointer.md
|
||||
;;
|
||||
|
||||
esac
|
||||
|
|
|
|||
141
docs/diagnostics/json-pointer.md
Normal file
141
docs/diagnostics/json-pointer.md
Normal file
|
|
@ -0,0 +1,141 @@
|
|||
<!-- Generated by: cargo run --bin pointer_errors_demo > docs/diagnostics/json-pointer.md -->
|
||||
|
||||
# JSON Pointer Diagnostics
|
||||
|
||||
These are the error messages you'll see when a [JSON Pointer (RFC 6901)](https://datatracker.ietf.org/doc/html/rfc6901)
|
||||
operation fails.
|
||||
|
||||
## Why These Errors Are Limited
|
||||
|
||||
The JSON object that failed to index probably doesn't exist anywhere as a file. It's
|
||||
built by replaying delta events from the archive. The filename and line numbers in
|
||||
these errors point to the source of the JSON pointer paths—the add/change/remove
|
||||
events in the archive—not to the object itself.
|
||||
|
||||
A proper solution would dump the reconstructed JSON object to a file so you could
|
||||
inspect it with `jq` or a text editor. That engineering work didn't happen.
|
||||
|
||||
Instead, you get:
|
||||
|
||||
- The pointer path that failed, with the failing segment underlined
|
||||
- The actual value at the parent path (truncated)
|
||||
- Some strings you can grep for in the archive
|
||||
|
||||
This is better than nothing, but it's still awkward. You can see *what* failed but
|
||||
not easily inspect the full object we tried to index into. If you're lucky, the
|
||||
truncated value shown is enough. If you're developing on this project, at least
|
||||
you know what the errors look like.
|
||||
|
||||
## Contributing
|
||||
|
||||
If an error message is confusing or unhelpful for your case, please open an issue
|
||||
or submit a pull request.
|
||||
|
||||
## Key Not Found
|
||||
|
||||
Key doesn't exist in the object. Shows available keys and suggests typos.
|
||||
|
||||
```
|
||||
error E051: Path not found
|
||||
|
||||
I was traversing the JSON path '/user/emial' and got stuck.
|
||||
|
||||
I couldn't find the key 'emial'.
|
||||
|
||||
/user/emial
|
||||
^^^^^
|
||||
|
||||
Value at '/user':
|
||||
│ "age": ...
|
||||
│ "email": ...
|
||||
│ "name": ...
|
||||
|
||||
Available keys: age, email, name
|
||||
Did you mean 'email'?
|
||||
```
|
||||
|
||||
## Type Mismatch
|
||||
|
||||
Tried to index into a value that doesn't support it (e.g., `/domain` on a string,
|
||||
`/0` on a number). Shows the actual type.
|
||||
|
||||
```
|
||||
error E060: Type mismatch
|
||||
|
||||
I was traversing the JSON path '/users/0/email/domain' and got stuck.
|
||||
|
||||
I can't index into string with 'domain'.
|
||||
|
||||
/users/0/email/domain
|
||||
^^^^^^
|
||||
|
||||
Value at '/users/0/email':
|
||||
│ "alice@example.com"
|
||||
|
||||
Object keys like '/domain' only work on objects, not string.
|
||||
```
|
||||
|
||||
## Array Index Out of Bounds
|
||||
|
||||
Index past the end of the array. Shows the array length.
|
||||
|
||||
```
|
||||
error E051: Path not found
|
||||
|
||||
I was traversing the JSON path '/items/5' and got stuck.
|
||||
|
||||
I couldn't find index 5 (array length is 3).
|
||||
|
||||
/items/5
|
||||
^
|
||||
|
||||
Value at '/items':
|
||||
│ 0: "apple"
|
||||
│ 1: "banana"
|
||||
│ 2: "cherry"
|
||||
|
||||
Valid indices are 0-2.
|
||||
```
|
||||
|
||||
## Array Index
|
||||
|
||||
If you think you have an object but you're actually indexing into an array, you'll see this error.
|
||||
|
||||
```
|
||||
error E052: Invalid array index
|
||||
|
||||
I was traversing the JSON path '/items/foo' and got stuck.
|
||||
|
||||
I couldn't parse 'foo' as an array index.
|
||||
|
||||
/items/foo
|
||||
^^^
|
||||
|
||||
Value at '/items':
|
||||
│ 0: "apple"
|
||||
│ 1: "banana"
|
||||
│ 2: "cherry"
|
||||
|
||||
Array indices must be non-negative integers. Got 'foo'.
|
||||
```
|
||||
|
||||
## Deep Path Failures
|
||||
|
||||
For long paths, the underline shows which segment failed. The full path remains
|
||||
visible so you can see what you were trying to reach.
|
||||
|
||||
```
|
||||
error E051: Path not found
|
||||
|
||||
I was traversing the JSON path '/data/users/0/profile/settings/theme' and got stuck.
|
||||
|
||||
I couldn't find the key 'settings'.
|
||||
|
||||
/data/users/0/profile/settings/theme
|
||||
^^^^^^^^
|
||||
|
||||
Value at '/data/users/0/profile':
|
||||
│ "name": ...
|
||||
|
||||
Available keys: name
|
||||
```
|
||||
49
docs/fuzz-testing.md
Normal file
49
docs/fuzz-testing.md
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
# Fuzz Testing
|
||||
|
||||
Fuzz testing throws random inputs at your code until something breaks.
|
||||
|
||||
## Commands
|
||||
|
||||
List available fuzz targets:
|
||||
```
|
||||
cargo fuzz list
|
||||
```
|
||||
|
||||
Run a fuzz target:
|
||||
```
|
||||
cargo fuzz run fuzz_apply_move
|
||||
```
|
||||
|
||||
Runs until you kill it or it finds a crash.
|
||||
|
||||
## Reading the Output
|
||||
|
||||
```
|
||||
#787958 REDUCE cov: 1281 ft: 6423 corp: 1112/621Kb lim: 4096 exec/s: 13823 rss: 584Mb L: 19/3954 MS: 1 EraseBytes-
|
||||
#788755 REDUCE cov: 1281 ft: 6424 corp: 1113/621Kb lim: 4096 exec/s: 13837 rss: 584Mb L: 767/3954 MS: 2 CMP-CrossOver- DE: "6\000\000\000"-
|
||||
#789383 REDUCE cov: 1281 ft: 6424 corp: 1113/621Kb lim: 4096 exec/s: 13848 rss: 584Mb L: 59/3954 MS: 3 InsertByte-ShuffleBytes-EraseBytes-
|
||||
```
|
||||
|
||||
The fields:
|
||||
|
||||
- `#787958` — test case number. How many inputs have been tried.
|
||||
- `REDUCE` — what happened. `NEW` means new code was reached. `REDUCE` means an input was shrunk while keeping the same coverage. `pulse` is just a heartbeat.
|
||||
- `cov: 1281` — coverage. Number of code edges hit. This is what you care about.
|
||||
- `ft: 6423` — features. Finer-grained coverage metric. Ignore it.
|
||||
- `corp: 1112/621Kb` — corpus. 1112 interesting inputs saved, 621KB total.
|
||||
- `exec/s: 13823` — speed. Test cases per second.
|
||||
- `rss: 584Mb` — memory use.
|
||||
- `L: 19/3954` — input length. This one was 19 bytes. Largest in corpus is 3954.
|
||||
- `MS: 1 EraseBytes-` — mutation. How the input was generated. Doesn't matter.
|
||||
|
||||
## Is It Working?
|
||||
|
||||
Watch `cov`. If it goes up, the fuzzer is finding new code paths. If it stops going up, either you have good coverage or the fuzzer is stuck.
|
||||
|
||||
`exec/s` in the thousands is fine. If it drops to double digits, something is wrong.
|
||||
|
||||
Seeing `NEW` events means progress. Long stretches without `NEW` means diminishing returns.
|
||||
|
||||
## When to Stop
|
||||
|
||||
When `cov` stops increasing and you're bored. Hours for a quick check, days for thoroughness.
|
||||
116
fuzz/Cargo.lock
generated
116
fuzz/Cargo.lock
generated
|
|
@ -2,6 +2,27 @@
|
|||
# It is not intended for manual editing.
|
||||
version = 4
|
||||
|
||||
[[package]]
|
||||
name = "adler2"
|
||||
version = "2.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa"
|
||||
|
||||
[[package]]
|
||||
name = "alloc-no-stdlib"
|
||||
version = "2.0.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cc7bb162ec39d46ab1ca8c77bf72e890535becd1751bb45f64c597edb4c8c6b3"
|
||||
|
||||
[[package]]
|
||||
name = "alloc-stdlib"
|
||||
version = "0.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "94fb8275041c72129eb51b7d0322c29b8387a0386127718b096429201a5d6ece"
|
||||
dependencies = [
|
||||
"alloc-no-stdlib",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "android_system_properties"
|
||||
version = "0.1.5"
|
||||
|
|
@ -32,6 +53,27 @@ version = "2.9.4"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2261d10cca569e4643e526d8dc2e62e433cc8aba21ab764233731f8d369bf394"
|
||||
|
||||
[[package]]
|
||||
name = "brotli"
|
||||
version = "8.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4bd8b9603c7aa97359dbd97ecf258968c95f3adddd6db2f7e7a5bef101c84560"
|
||||
dependencies = [
|
||||
"alloc-no-stdlib",
|
||||
"alloc-stdlib",
|
||||
"brotli-decompressor",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "brotli-decompressor"
|
||||
version = "5.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "874bb8112abecc98cbd6d81ea4fa7e94fb9449648c93cc89aa40c81c24d7de03"
|
||||
dependencies = [
|
||||
"alloc-no-stdlib",
|
||||
"alloc-stdlib",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bumpalo"
|
||||
version = "3.19.0"
|
||||
|
|
@ -76,6 +118,15 @@ version = "0.8.7"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
|
||||
|
||||
[[package]]
|
||||
name = "crc32fast"
|
||||
version = "1.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "derive_arbitrary"
|
||||
version = "1.4.2"
|
||||
|
|
@ -109,6 +160,16 @@ version = "0.1.2"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1ced73b1dacfc750a6db6c0a0c3a3853c8b41997e2e2c563dc90804ae6867959"
|
||||
|
||||
[[package]]
|
||||
name = "flate2"
|
||||
version = "1.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bfe33edd8e85a12a67454e37f8c75e730830d83e313556ab9ebf9ee7fbeb3bfb"
|
||||
dependencies = [
|
||||
"crc32fast",
|
||||
"miniz_oxide",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "getrandom"
|
||||
version = "0.3.3"
|
||||
|
|
@ -173,13 +234,16 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "json-archive"
|
||||
version = "0.1.0"
|
||||
version = "0.99.0"
|
||||
dependencies = [
|
||||
"brotli",
|
||||
"chrono",
|
||||
"flate2",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"uuid",
|
||||
"xflags",
|
||||
"zstd",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -227,6 +291,16 @@ version = "2.7.6"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273"
|
||||
|
||||
[[package]]
|
||||
name = "miniz_oxide"
|
||||
version = "0.8.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316"
|
||||
dependencies = [
|
||||
"adler2",
|
||||
"simd-adler32",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num-traits"
|
||||
version = "0.2.19"
|
||||
|
|
@ -242,6 +316,12 @@ version = "1.21.3"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
|
||||
|
||||
[[package]]
|
||||
name = "pkg-config"
|
||||
version = "0.3.32"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.101"
|
||||
|
|
@ -340,6 +420,12 @@ version = "1.3.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
|
||||
|
||||
[[package]]
|
||||
name = "simd-adler32"
|
||||
version = "0.3.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe"
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.106"
|
||||
|
|
@ -547,3 +633,31 @@ name = "xflags-macros"
|
|||
version = "0.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "672423d4fea7ffa2f6c25ba60031ea13dc6258070556f125cc4d790007d4a155"
|
||||
|
||||
[[package]]
|
||||
name = "zstd"
|
||||
version = "0.13.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a"
|
||||
dependencies = [
|
||||
"zstd-safe",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zstd-safe"
|
||||
version = "7.2.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d"
|
||||
dependencies = [
|
||||
"zstd-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zstd-sys"
|
||||
version = "2.0.16+zstd.1.5.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "91e19ebc2adc8f83e43039e79776e3fda8ca919132d68a1fed6a5faca2683748"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"pkg-config",
|
||||
]
|
||||
|
|
|
|||
|
|
@ -36,3 +36,10 @@ path = "fuzz_targets/fuzz_mutations.rs"
|
|||
test = false
|
||||
doc = false
|
||||
bench = false
|
||||
|
||||
[[bin]]
|
||||
name = "fuzz_apply_move"
|
||||
path = "fuzz_targets/fuzz_apply_move.rs"
|
||||
test = false
|
||||
doc = false
|
||||
bench = false
|
||||
|
|
|
|||
186
fuzz/fuzz_targets/fuzz_apply_move.rs
Normal file
186
fuzz/fuzz_targets/fuzz_apply_move.rs
Normal file
|
|
@ -0,0 +1,186 @@
|
|||
#![no_main]
|
||||
|
||||
use arbitrary::{Arbitrary, Unstructured};
|
||||
use json_archive::apply_move;
|
||||
use libfuzzer_sys::fuzz_target;
|
||||
use serde_json::{json, Value};
|
||||
|
||||
#[derive(Arbitrary, Debug)]
|
||||
struct FuzzMoveInput {
|
||||
structure: FuzzStructure,
|
||||
moves: Vec<(u8, u8)>,
|
||||
}
|
||||
|
||||
#[derive(Arbitrary, Debug)]
|
||||
enum FuzzStructure {
|
||||
// Direct array at root path
|
||||
RootArray(Vec<FuzzValue>),
|
||||
// Object with array field
|
||||
ObjectWithArray {
|
||||
field_name: String,
|
||||
array: Vec<FuzzValue>,
|
||||
},
|
||||
// Nested object with array
|
||||
NestedArray {
|
||||
outer_field: String,
|
||||
inner_field: String,
|
||||
array: Vec<FuzzValue>,
|
||||
},
|
||||
// Non-array value (should error)
|
||||
NonArray(FuzzValue),
|
||||
}
|
||||
|
||||
#[derive(Arbitrary, Debug, Clone)]
|
||||
enum FuzzValue {
|
||||
Null,
|
||||
Bool(bool),
|
||||
SmallInt(i8),
|
||||
String(String),
|
||||
// Limit recursion depth
|
||||
Array(Vec<SimpleValue>),
|
||||
Object(Vec<(String, SimpleValue)>),
|
||||
}
|
||||
|
||||
#[derive(Arbitrary, Debug, Clone)]
|
||||
enum SimpleValue {
|
||||
Null,
|
||||
Bool(bool),
|
||||
SmallInt(i8),
|
||||
String(String),
|
||||
}
|
||||
|
||||
impl SimpleValue {
|
||||
fn to_json(&self) -> Value {
|
||||
match self {
|
||||
SimpleValue::Null => Value::Null,
|
||||
SimpleValue::Bool(b) => Value::Bool(*b),
|
||||
SimpleValue::SmallInt(n) => json!(n),
|
||||
SimpleValue::String(s) => Value::String(s.clone()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FuzzValue {
|
||||
fn to_json(&self) -> Value {
|
||||
match self {
|
||||
FuzzValue::Null => Value::Null,
|
||||
FuzzValue::Bool(b) => Value::Bool(*b),
|
||||
FuzzValue::SmallInt(n) => json!(n),
|
||||
FuzzValue::String(s) => Value::String(s.clone()),
|
||||
FuzzValue::Array(arr) => Value::Array(arr.iter().map(|v| v.to_json()).collect()),
|
||||
FuzzValue::Object(obj) => {
|
||||
let map: serde_json::Map<String, Value> =
|
||||
obj.iter().map(|(k, v)| (k.clone(), v.to_json())).collect();
|
||||
Value::Object(map)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FuzzStructure {
|
||||
fn to_json_and_path(&self) -> (Value, String) {
|
||||
match self {
|
||||
FuzzStructure::RootArray(arr) => {
|
||||
let json_arr = Value::Array(arr.iter().map(|v| v.to_json()).collect());
|
||||
(json!({"root": json_arr}), "/root".to_string())
|
||||
}
|
||||
FuzzStructure::ObjectWithArray { field_name, array } => {
|
||||
let json_arr = Value::Array(array.iter().map(|v| v.to_json()).collect());
|
||||
let path = format!("/{}", escape_json_pointer(field_name));
|
||||
(json!({ field_name.clone(): json_arr }), path)
|
||||
}
|
||||
FuzzStructure::NestedArray {
|
||||
outer_field,
|
||||
inner_field,
|
||||
array,
|
||||
} => {
|
||||
let json_arr = Value::Array(array.iter().map(|v| v.to_json()).collect());
|
||||
let path = format!(
|
||||
"/{}/{}",
|
||||
escape_json_pointer(outer_field),
|
||||
escape_json_pointer(inner_field)
|
||||
);
|
||||
(
|
||||
json!({ outer_field.clone(): { inner_field.clone(): json_arr } }),
|
||||
path,
|
||||
)
|
||||
}
|
||||
FuzzStructure::NonArray(val) => {
|
||||
(json!({"value": val.to_json()}), "/value".to_string())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn escape_json_pointer(s: &str) -> String {
|
||||
s.replace('~', "~0").replace('/', "~1")
|
||||
}
|
||||
|
||||
fuzz_target!(|data: &[u8]| {
|
||||
let mut u = Unstructured::new(data);
|
||||
if let Ok(input) = FuzzMoveInput::arbitrary(&mut u) {
|
||||
let (mut state, path) = input.structure.to_json_and_path();
|
||||
let original_state = state.clone();
|
||||
|
||||
// Get actual array from original state to compare against
|
||||
let original_array = get_array_at_path(&original_state, &path).cloned();
|
||||
|
||||
// Convert moves to usize
|
||||
let moves: Vec<(usize, usize)> = input
|
||||
.moves
|
||||
.iter()
|
||||
.map(|(from, to)| (*from as usize, *to as usize))
|
||||
.collect();
|
||||
|
||||
let result = apply_move(&mut state, &path, moves.clone());
|
||||
|
||||
match result {
|
||||
Ok(()) => {
|
||||
// If successful, verify invariants using actual arrays from JSON
|
||||
let new_array = get_array_at_path(&state, &path);
|
||||
|
||||
if let (Some(orig_arr), Some(new_arr)) = (&original_array, new_array) {
|
||||
// 1. Array length should be preserved
|
||||
assert_eq!(
|
||||
new_arr.len(),
|
||||
orig_arr.len(),
|
||||
"Array length changed after move: was {}, now {}",
|
||||
orig_arr.len(),
|
||||
new_arr.len()
|
||||
);
|
||||
|
||||
// 2. All original elements should still exist (as a multiset)
|
||||
let mut orig_sorted: Vec<_> =
|
||||
orig_arr.iter().map(|v| v.to_string()).collect();
|
||||
let mut new_sorted: Vec<_> = new_arr.iter().map(|v| v.to_string()).collect();
|
||||
orig_sorted.sort();
|
||||
new_sorted.sort();
|
||||
assert_eq!(
|
||||
orig_sorted, new_sorted,
|
||||
"Elements were lost or duplicated during move"
|
||||
);
|
||||
}
|
||||
}
|
||||
Err(diag) => {
|
||||
// Error is expected for:
|
||||
// - Non-array targets
|
||||
// - Out of bounds indices
|
||||
// - Invalid paths
|
||||
// Just make sure we got a proper diagnostic
|
||||
assert!(!diag.description.is_empty());
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
fn get_array_at_path<'a>(state: &'a Value, path: &str) -> Option<&'a Vec<Value>> {
|
||||
let parts: Vec<&str> = path.split('/').filter(|s| !s.is_empty()).collect();
|
||||
let mut current = state;
|
||||
|
||||
for part in parts {
|
||||
let unescaped = part.replace("~1", "/").replace("~0", "~");
|
||||
current = current.get(&unescaped)?;
|
||||
}
|
||||
|
||||
current.as_array()
|
||||
}
|
||||
640
src/archive.rs
640
src/archive.rs
|
|
@ -1,640 +0,0 @@
|
|||
// json-archive is a tool for tracking JSON file changes over time
|
||||
// Copyright (C) 2025 Peoples Grocers LLC
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as published
|
||||
// by the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
//
|
||||
// To purchase a license under different terms contact admin@peoplesgrocers.com
|
||||
// To request changes, report bugs, or give user feedback contact
|
||||
// marxism@peoplesgrocers.com
|
||||
//
|
||||
|
||||
use chrono::Utc;
|
||||
use serde_json::Value;
|
||||
use std::fs::{File, OpenOptions};
|
||||
use std::io::{BufWriter, Write};
|
||||
use std::path::{Path, PathBuf};
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::diagnostics::{Diagnostic, DiagnosticCode, DiagnosticLevel};
|
||||
use crate::diff;
|
||||
use crate::events::{Event, Header, Observation};
|
||||
use crate::reader::{ArchiveReader, ReadMode};
|
||||
|
||||
pub struct ArchiveWriter {
|
||||
writer: BufWriter<File>,
|
||||
observation_count: usize,
|
||||
snapshot_interval: Option<usize>,
|
||||
filename: String,
|
||||
}
|
||||
|
||||
impl ArchiveWriter {
|
||||
pub fn new<P: AsRef<Path>>(
|
||||
path: P,
|
||||
snapshot_interval: Option<usize>,
|
||||
) -> Result<Self, Vec<Diagnostic>> {
|
||||
let filename = path.as_ref().display().to_string();
|
||||
let file = match File::create(&path) {
|
||||
Ok(f) => f,
|
||||
Err(e) => {
|
||||
let diagnostic = Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't create the output file: {}", e)
|
||||
)
|
||||
.with_advice(
|
||||
"Make sure you have write permission in this directory and that the path is valid."
|
||||
.to_string()
|
||||
);
|
||||
return Err(vec![diagnostic]);
|
||||
}
|
||||
};
|
||||
let writer = BufWriter::new(file);
|
||||
|
||||
Ok(Self {
|
||||
writer,
|
||||
observation_count: 0,
|
||||
snapshot_interval,
|
||||
filename,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn new_append<P: AsRef<Path>>(
|
||||
path: P,
|
||||
snapshot_interval: Option<usize>,
|
||||
current_observation_count: usize,
|
||||
) -> Result<Self, Vec<Diagnostic>> {
|
||||
let filename = path.as_ref().display().to_string();
|
||||
let file = match OpenOptions::new().append(true).open(&path) {
|
||||
Ok(f) => f,
|
||||
Err(e) => {
|
||||
let diagnostic = Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't open the archive file for appending: {}", e)
|
||||
)
|
||||
.with_advice(
|
||||
"Make sure the archive file exists and you have write permission."
|
||||
.to_string()
|
||||
);
|
||||
return Err(vec![diagnostic]);
|
||||
}
|
||||
};
|
||||
let writer = BufWriter::new(file);
|
||||
|
||||
Ok(Self {
|
||||
writer,
|
||||
observation_count: current_observation_count,
|
||||
snapshot_interval,
|
||||
filename,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn write_header(&mut self, header: &Header) -> Result<(), Vec<Diagnostic>> {
|
||||
let header_json = match serde_json::to_string(header) {
|
||||
Ok(json) => json,
|
||||
Err(e) => {
|
||||
return Err(vec![Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::InvalidEventJson,
|
||||
format!("I couldn't serialize the header to JSON: {}", e),
|
||||
)
|
||||
.with_location(self.filename.clone(), 1)]);
|
||||
}
|
||||
};
|
||||
|
||||
if let Err(e) = writeln!(self.writer, "{}", header_json) {
|
||||
return Err(vec![Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't write to the output file: {}", e),
|
||||
)
|
||||
.with_location(self.filename.clone(), 1)]);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn write_comment(&mut self, comment: &str) -> Result<(), Vec<Diagnostic>> {
|
||||
if let Err(e) = writeln!(self.writer, "# {}", comment) {
|
||||
return Err(vec![Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't write to the output file: {}", e),
|
||||
)]);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn write_observation(&mut self, observation: Observation) -> Result<(), Vec<Diagnostic>> {
|
||||
let events = observation.to_events();
|
||||
|
||||
for event in events {
|
||||
let event_json = match serde_json::to_string(&event) {
|
||||
Ok(json) => json,
|
||||
Err(e) => {
|
||||
return Err(vec![Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::InvalidEventJson,
|
||||
format!("I couldn't serialize an event to JSON: {}", e),
|
||||
)]);
|
||||
}
|
||||
};
|
||||
|
||||
if let Err(e) = writeln!(self.writer, "{}", event_json) {
|
||||
return Err(vec![Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't write to the output file: {}", e),
|
||||
)]);
|
||||
}
|
||||
}
|
||||
|
||||
self.observation_count += 1;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn write_snapshot(&mut self, object: &Value) -> Result<(), Vec<Diagnostic>> {
|
||||
let snapshot_id = format!("snapshot-{}", Uuid::new_v4());
|
||||
let snapshot = Event::Snapshot {
|
||||
observation_id: snapshot_id,
|
||||
timestamp: Utc::now(),
|
||||
object: object.clone(),
|
||||
};
|
||||
|
||||
let event_json = match serde_json::to_string(&snapshot) {
|
||||
Ok(json) => json,
|
||||
Err(e) => {
|
||||
return Err(vec![Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::InvalidEventJson,
|
||||
format!("I couldn't serialize the snapshot to JSON: {}", e),
|
||||
)]);
|
||||
}
|
||||
};
|
||||
|
||||
if let Err(e) = writeln!(self.writer, "{}", event_json) {
|
||||
return Err(vec![Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't write to the output file: {}", e),
|
||||
)]);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn should_write_snapshot(&self) -> bool {
|
||||
if let Some(interval) = self.snapshot_interval {
|
||||
self.observation_count > 0 && self.observation_count % interval == 0
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
pub fn finish(mut self) -> Result<(), Vec<Diagnostic>> {
|
||||
if let Err(e) = self.writer.flush() {
|
||||
return Err(vec![Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't flush the output file: {}", e),
|
||||
)]);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub struct ArchiveBuilder {
|
||||
initial_state: Option<Value>,
|
||||
current_state: Value,
|
||||
source: Option<String>,
|
||||
snapshot_interval: Option<usize>,
|
||||
}
|
||||
|
||||
impl ArchiveBuilder {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
initial_state: None,
|
||||
current_state: Value::Null,
|
||||
source: None,
|
||||
snapshot_interval: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn with_source(mut self, source: String) -> Self {
|
||||
self.source = Some(source);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_snapshot_interval(mut self, interval: usize) -> Self {
|
||||
self.snapshot_interval = Some(interval);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn add_state(&mut self, state: Value) -> Option<Observation> {
|
||||
if self.initial_state.is_none() {
|
||||
self.initial_state = Some(state.clone());
|
||||
self.current_state = state;
|
||||
return None;
|
||||
}
|
||||
|
||||
let observation_id = format!("obs-{}", Uuid::new_v4());
|
||||
let timestamp = Utc::now();
|
||||
|
||||
let diff_result: Vec<Event> = diff::diff(&self.current_state, &state, "", &observation_id);
|
||||
self.current_state = state;
|
||||
|
||||
let mut observation = Observation::new(observation_id, timestamp);
|
||||
for event in diff_result {
|
||||
observation.add_event(event);
|
||||
}
|
||||
|
||||
Some(observation)
|
||||
}
|
||||
|
||||
pub fn build<P: AsRef<Path>>(self, output_path: P) -> Result<(), Vec<Diagnostic>> {
|
||||
if self.initial_state.is_none() {
|
||||
return Err(vec![Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::MissingHeaderField,
|
||||
"I can't build an archive without any initial state.".to_string(),
|
||||
)]);
|
||||
}
|
||||
|
||||
let header = Header::new(self.initial_state.unwrap(), self.source);
|
||||
|
||||
let mut writer = ArchiveWriter::new(output_path, self.snapshot_interval)?;
|
||||
writer.write_header(&header)?;
|
||||
writer.finish()?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn get_initial_state(&self) -> Option<&Value> {
|
||||
self.initial_state.as_ref()
|
||||
}
|
||||
}
|
||||
|
||||
/// Generate default output filename from input filename
|
||||
pub fn default_output_filename<P: AsRef<Path>>(input_path: P) -> PathBuf {
|
||||
let path = input_path.as_ref();
|
||||
let mut output = path.to_path_buf();
|
||||
|
||||
// If it already ends with .json.archive, don't modify it
|
||||
if let Some(filename) = path.file_name() {
|
||||
if let Some(filename_str) = filename.to_str() {
|
||||
if filename_str.ends_with(".json.archive") {
|
||||
return output;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Add .json.archive extension
|
||||
if let Some(extension) = path.extension() {
|
||||
if extension == "json" {
|
||||
// Replace .json with .json.archive
|
||||
output.set_extension("json.archive");
|
||||
} else {
|
||||
// Append .json.archive to whatever extension exists
|
||||
let new_extension = format!("{}.json.archive", extension.to_string_lossy());
|
||||
output.set_extension(new_extension);
|
||||
}
|
||||
} else {
|
||||
// No extension, just add .json.archive
|
||||
output.set_extension("json.archive");
|
||||
}
|
||||
|
||||
output
|
||||
}
|
||||
|
||||
pub fn create_archive_from_files<P: AsRef<Path>>(
|
||||
input_files: &[P],
|
||||
output_path: P,
|
||||
source: Option<String>,
|
||||
snapshot_interval: Option<usize>,
|
||||
) -> Result<(), Vec<Diagnostic>> {
|
||||
let mut builder = ArchiveBuilder::new();
|
||||
if let Some(source) = source {
|
||||
builder = builder.with_source(source);
|
||||
}
|
||||
if let Some(interval) = snapshot_interval {
|
||||
builder = builder.with_snapshot_interval(interval);
|
||||
}
|
||||
|
||||
let first_content = std::fs::read_to_string(&input_files[0]).map_err(|e| {
|
||||
vec![Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't read the first input file: {}", e),
|
||||
)]
|
||||
})?;
|
||||
|
||||
let first_state: Value = serde_json::from_str(&first_content).map_err(|e| {
|
||||
vec![Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::InvalidEventJson,
|
||||
format!("I couldn't parse the first input file as JSON: {}", e),
|
||||
)
|
||||
.with_advice("Make sure the file contains valid JSON.".to_string())]
|
||||
})?;
|
||||
|
||||
let _ = builder.add_state(first_state.clone());
|
||||
|
||||
let header = Header::new(first_state, builder.source.clone());
|
||||
let mut writer = ArchiveWriter::new(&output_path, builder.snapshot_interval)?;
|
||||
writer.write_header(&header)?;
|
||||
|
||||
for file_path in input_files[1..].iter() {
|
||||
writer.write_comment(&format!("Processing file: {:?}", file_path.as_ref()))?;
|
||||
|
||||
let content = std::fs::read_to_string(file_path).map_err(|e| {
|
||||
vec![Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't read the input file: {}", e),
|
||||
)]
|
||||
})?;
|
||||
|
||||
let state: Value = serde_json::from_str(&content).map_err(|e| {
|
||||
vec![Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::InvalidEventJson,
|
||||
format!("I couldn't parse the input file as JSON: {}", e),
|
||||
)
|
||||
.with_advice("Make sure the file contains valid JSON.".to_string())]
|
||||
})?;
|
||||
|
||||
if let Some(observation) = builder.add_state(state.clone()) {
|
||||
writer.write_observation(observation)?;
|
||||
|
||||
if writer.should_write_snapshot() {
|
||||
writer.write_snapshot(&state)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
writer.finish()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn append_to_archive<P: AsRef<Path>, Q: AsRef<Path>>(
|
||||
archive_path: P,
|
||||
new_files: &[Q],
|
||||
output_path: P,
|
||||
source: Option<String>,
|
||||
snapshot_interval: Option<usize>,
|
||||
) -> Vec<Diagnostic> {
|
||||
// Read the existing archive to get the final state
|
||||
let reader = match ArchiveReader::new(&archive_path, ReadMode::AppendSeek) {
|
||||
Ok(r) => r,
|
||||
Err(e) => {
|
||||
return vec![Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't open the archive for reading: {}", e),
|
||||
)];
|
||||
}
|
||||
};
|
||||
|
||||
let read_result = match reader.read(&archive_path) {
|
||||
Ok(result) => result,
|
||||
Err(e) => {
|
||||
return vec![Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't read the archive: {}", e),
|
||||
)];
|
||||
}
|
||||
};
|
||||
|
||||
// Check for fatal diagnostics in the archive
|
||||
if read_result.diagnostics.has_fatal() {
|
||||
let mut diagnostics = vec![Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::InvalidEventJson,
|
||||
"The existing archive contains fatal errors. Cannot append to a corrupt archive.".to_string(),
|
||||
)];
|
||||
diagnostics.extend(read_result.diagnostics.into_diagnostics());
|
||||
return diagnostics;
|
||||
}
|
||||
|
||||
// If output path is different from archive path, copy the archive first
|
||||
if archive_path.as_ref() != output_path.as_ref() {
|
||||
if let Err(e) = std::fs::copy(&archive_path, &output_path) {
|
||||
return vec![Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't copy the archive to the output location: {}", e),
|
||||
)];
|
||||
}
|
||||
}
|
||||
|
||||
// Create an append writer
|
||||
let mut writer = match ArchiveWriter::new_append(&output_path, snapshot_interval, read_result.observation_count) {
|
||||
Ok(w) => w,
|
||||
Err(diagnostics) => return diagnostics,
|
||||
};
|
||||
|
||||
// Create a builder to track state changes
|
||||
let mut builder = ArchiveBuilder::new();
|
||||
if let Some(source) = source {
|
||||
builder = builder.with_source(source);
|
||||
}
|
||||
if let Some(interval) = snapshot_interval {
|
||||
builder = builder.with_snapshot_interval(interval);
|
||||
}
|
||||
|
||||
// Initialize builder with the final state from the archive
|
||||
let current_state = read_result.final_state;
|
||||
builder.current_state = current_state.clone();
|
||||
builder.initial_state = Some(current_state.clone());
|
||||
|
||||
// Process each new file
|
||||
for file_path in new_files.iter() {
|
||||
if let Err(diagnostics) = writer.write_comment(&format!("Processing file: {:?}", file_path.as_ref())) {
|
||||
return diagnostics;
|
||||
}
|
||||
|
||||
let content = match std::fs::read_to_string(file_path) {
|
||||
Ok(content) => content,
|
||||
Err(e) => {
|
||||
return vec![Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't read the input file: {}", e),
|
||||
)];
|
||||
}
|
||||
};
|
||||
|
||||
let state: Value = match serde_json::from_str(&content) {
|
||||
Ok(state) => state,
|
||||
Err(e) => {
|
||||
return vec![Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::InvalidEventJson,
|
||||
format!("I couldn't parse the input file as JSON: {}", e),
|
||||
)
|
||||
.with_advice("Make sure the file contains valid JSON.".to_string())];
|
||||
}
|
||||
};
|
||||
|
||||
if let Some(observation) = builder.add_state(state.clone()) {
|
||||
if let Err(diagnostics) = writer.write_observation(observation) {
|
||||
return diagnostics;
|
||||
}
|
||||
|
||||
if writer.should_write_snapshot() {
|
||||
if let Err(diagnostics) = writer.write_snapshot(&state) {
|
||||
return diagnostics;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Finish writing
|
||||
match writer.finish() {
|
||||
Ok(()) => Vec::new(),
|
||||
Err(diagnostics) => diagnostics,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use serde_json::json;
|
||||
use std::io::Write;
|
||||
use tempfile::NamedTempFile;
|
||||
|
||||
#[test]
|
||||
fn test_archive_writer_header() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let temp_file = NamedTempFile::new()?;
|
||||
let header = Header::new(json!({"test": "value"}), Some("test-source".to_string()));
|
||||
|
||||
{
|
||||
let mut writer = ArchiveWriter::new(temp_file.path(), None)
|
||||
.map_err(|_| "Failed to create writer")?;
|
||||
writer
|
||||
.write_header(&header)
|
||||
.map_err(|_| "Failed to write header")?;
|
||||
writer.finish().map_err(|_| "Failed to finish")?;
|
||||
}
|
||||
|
||||
let content = std::fs::read_to_string(temp_file.path())?;
|
||||
let lines: Vec<&str> = content.lines().collect();
|
||||
assert_eq!(lines.len(), 1);
|
||||
|
||||
let parsed_header: Header = serde_json::from_str(lines[0])?;
|
||||
assert_eq!(parsed_header.file_type, "@peoplesgrocers/json-archive");
|
||||
assert_eq!(parsed_header.version, 1);
|
||||
assert_eq!(parsed_header.initial, json!({"test": "value"}));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_archive_builder() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let mut builder = ArchiveBuilder::new();
|
||||
|
||||
// First state becomes initial
|
||||
let result = builder.add_state(json!({"count": 0}));
|
||||
assert!(result.is_none());
|
||||
|
||||
// Second state generates observation
|
||||
let observation = builder
|
||||
.add_state(json!({"count": 1}))
|
||||
.expect("Should generate observation");
|
||||
assert!(!observation.events.is_empty());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_create_archive_from_files() -> Result<(), Box<dyn std::error::Error>> {
|
||||
// Create temporary input files
|
||||
let mut file1 = NamedTempFile::new()?;
|
||||
let mut file2 = NamedTempFile::new()?;
|
||||
let output_file = NamedTempFile::new()?;
|
||||
|
||||
writeln!(file1, r#"{{"count": 0, "name": "test"}}"#)?;
|
||||
writeln!(file2, r#"{{"count": 1, "name": "test"}}"#)?;
|
||||
|
||||
let input_files = vec![file1.path(), file2.path()];
|
||||
|
||||
create_archive_from_files(
|
||||
&input_files,
|
||||
output_file.path(),
|
||||
Some("test-source".to_string()),
|
||||
None,
|
||||
)
|
||||
.map_err(|_| "Failed to create archive")?;
|
||||
|
||||
let content = std::fs::read_to_string(output_file.path())?;
|
||||
let lines: Vec<&str> = content.lines().collect();
|
||||
|
||||
assert!(lines.len() >= 2); // At least header + comment + observe + change events
|
||||
|
||||
// First line should be header
|
||||
let header: Header = serde_json::from_str(lines[0])?;
|
||||
assert_eq!(header.file_type, "@peoplesgrocers/json-archive");
|
||||
assert_eq!(header.version, 1);
|
||||
assert_eq!(header.initial, json!({"count": 0, "name": "test"}));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_snapshot_interval() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let temp_file = NamedTempFile::new()?;
|
||||
let mut writer =
|
||||
ArchiveWriter::new(temp_file.path(), Some(2)).map_err(|_| "Failed to create writer")?;
|
||||
|
||||
assert!(!writer.should_write_snapshot()); // No observations yet
|
||||
|
||||
let obs1 = Observation::new("obs-1".to_string(), Utc::now());
|
||||
writer
|
||||
.write_observation(obs1)
|
||||
.map_err(|_| "Failed to write observation")?;
|
||||
assert!(!writer.should_write_snapshot()); // 1 observation, interval is 2
|
||||
|
||||
let obs2 = Observation::new("obs-2".to_string(), Utc::now());
|
||||
writer
|
||||
.write_observation(obs2)
|
||||
.map_err(|_| "Failed to write observation")?;
|
||||
assert!(writer.should_write_snapshot()); // 2 observations, should snapshot
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_default_output_filename() {
|
||||
assert_eq!(
|
||||
default_output_filename("test.json"),
|
||||
PathBuf::from("test.json.archive")
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
default_output_filename("test.txt"),
|
||||
PathBuf::from("test.txt.json.archive")
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
default_output_filename("test"),
|
||||
PathBuf::from("test.json.archive")
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
default_output_filename("test.json.archive"),
|
||||
PathBuf::from("test.json.archive")
|
||||
);
|
||||
}
|
||||
}
|
||||
595
src/archive_context.rs
Normal file
595
src/archive_context.rs
Normal file
|
|
@ -0,0 +1,595 @@
|
|||
// json-archive is a tool for tracking JSON file changes over time
|
||||
// Copyright (C) 2025 Peoples Grocers LLC
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as published
|
||||
// by the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
//
|
||||
// To purchase a license under different terms contact admin@peoplesgrocers.com
|
||||
// To request changes, report bugs, or give user feedback contact
|
||||
// marxism@peoplesgrocers.com
|
||||
//
|
||||
|
||||
//! Archive write context and shared observation writing logic.
|
||||
//!
|
||||
//! This module provides:
|
||||
//! - `WriteContext`: A struct that holds the state needed to write observations
|
||||
//! - `write_observations`: The shared logic for diffing JSON files and writing events
|
||||
//!
|
||||
//! The key insight is that both create and append operations share the same
|
||||
//! core logic once they've set up their initial state and writer.
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
use serde_json::Value;
|
||||
use std::io::Write;
|
||||
use std::path::{Path, PathBuf};
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::atomic_file::atomic_replace_file;
|
||||
use crate::detection::CompressionFormat;
|
||||
use crate::diagnostics::{Diagnostic, DiagnosticCode, DiagnosticCollector};
|
||||
use crate::diff;
|
||||
use crate::events::{Event, Observation};
|
||||
|
||||
/// Strategy for finishing the write operation.
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum FinishStrategy {
|
||||
/// Just flush the writer. Used for:
|
||||
/// - Creating new archives
|
||||
/// - Appending to uncompressed archives (same file)
|
||||
FlushOnly,
|
||||
|
||||
/// Atomic replace: swap temp file with original. Used for:
|
||||
/// - Appending to compressed archives (rewrite strategy)
|
||||
AtomicReplace {
|
||||
temp_path: PathBuf,
|
||||
output_path: PathBuf,
|
||||
},
|
||||
}
|
||||
|
||||
/// Context for writing observations to an archive.
|
||||
///
|
||||
/// This struct is the result of the "setup phase" for both create and append
|
||||
/// operations. Once you have a WriteContext, you can use `write_observations`
|
||||
/// to add new states, then call `finish` to complete the operation.
|
||||
pub struct WriteContext<W: Write> {
|
||||
/// The writer to output JSON lines to.
|
||||
pub writer: W,
|
||||
|
||||
/// Current state of the archive (used for diffing).
|
||||
pub current_state: Value,
|
||||
|
||||
/// Number of observations already in the archive.
|
||||
pub observation_count: usize,
|
||||
|
||||
/// Optional interval for writing snapshots.
|
||||
pub snapshot_interval: Option<usize>,
|
||||
|
||||
/// How to finish the write operation.
|
||||
pub finish_strategy: FinishStrategy,
|
||||
|
||||
/// Diagnostics collected during setup (e.g., warnings from reading existing archive).
|
||||
pub diagnostics: DiagnosticCollector,
|
||||
}
|
||||
|
||||
impl<W: Write> WriteContext<W> {
|
||||
/// Create a new write context.
|
||||
pub fn new(
|
||||
writer: W,
|
||||
current_state: Value,
|
||||
observation_count: usize,
|
||||
snapshot_interval: Option<usize>,
|
||||
finish_strategy: FinishStrategy,
|
||||
) -> Self {
|
||||
Self {
|
||||
writer,
|
||||
current_state,
|
||||
observation_count,
|
||||
snapshot_interval,
|
||||
finish_strategy,
|
||||
diagnostics: DiagnosticCollector::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a write context with existing diagnostics.
|
||||
pub fn with_diagnostics(
|
||||
writer: W,
|
||||
current_state: Value,
|
||||
observation_count: usize,
|
||||
snapshot_interval: Option<usize>,
|
||||
finish_strategy: FinishStrategy,
|
||||
diagnostics: DiagnosticCollector,
|
||||
) -> Self {
|
||||
Self {
|
||||
writer,
|
||||
current_state,
|
||||
observation_count,
|
||||
snapshot_interval,
|
||||
finish_strategy,
|
||||
diagnostics,
|
||||
}
|
||||
}
|
||||
|
||||
/// Write observations for a list of JSON files.
|
||||
///
|
||||
/// For each file:
|
||||
/// 1. Reads and parses the JSON
|
||||
/// 2. Diffs against current state
|
||||
/// 3. Writes observation events
|
||||
/// 4. Optionally writes a snapshot if interval is reached
|
||||
/// 5. Updates current state
|
||||
///
|
||||
/// Returns the number of observations written.
|
||||
pub fn write_observations<P: AsRef<Path>>(
|
||||
&mut self,
|
||||
files: &[P],
|
||||
) -> Result<usize, Vec<Diagnostic>> {
|
||||
let mut observations_written = 0;
|
||||
|
||||
for file_path in files.iter() {
|
||||
let file_path = file_path.as_ref();
|
||||
|
||||
// Write comment marking which file we're processing
|
||||
if let Err(e) = writeln!(self.writer, "# Processing file: {}", file_path.display()) {
|
||||
return Err(vec![Diagnostic::fatal(
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't write to the output: {}", e),
|
||||
)]);
|
||||
}
|
||||
|
||||
// Get file modification time for the observation timestamp
|
||||
let file_mtime = get_file_mtime(file_path)?;
|
||||
|
||||
// Read and parse new state
|
||||
let content = std::fs::read_to_string(file_path).map_err(|e| {
|
||||
vec![Diagnostic::fatal(
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't read the input file '{}': {}", file_path.display(), e),
|
||||
)]
|
||||
})?;
|
||||
|
||||
let new_state: Value = serde_json::from_str(&content).map_err(|e| {
|
||||
vec![Diagnostic::fatal(
|
||||
DiagnosticCode::InvalidEventJson,
|
||||
format!("I couldn't parse '{}' as JSON: {}", file_path.display(), e),
|
||||
)
|
||||
.with_advice("Make sure the file contains valid JSON.".to_string())]
|
||||
})?;
|
||||
|
||||
// Generate diff and create observation
|
||||
let observation_id = format!("obs-{}", Uuid::new_v4());
|
||||
let diff_events = diff::diff(&self.current_state, &new_state, "", &observation_id);
|
||||
|
||||
// Skip if no changes
|
||||
if diff_events.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Create and write observation
|
||||
let mut observation = Observation::new(observation_id, file_mtime);
|
||||
for event in diff_events {
|
||||
observation.add_event(event);
|
||||
}
|
||||
|
||||
self.write_observation(observation)?;
|
||||
observations_written += 1;
|
||||
self.observation_count += 1;
|
||||
|
||||
// Check if we should write a snapshot
|
||||
if self.should_write_snapshot() {
|
||||
self.write_snapshot(&new_state, file_mtime)?;
|
||||
}
|
||||
|
||||
// Update current state for next iteration
|
||||
self.current_state = new_state;
|
||||
}
|
||||
|
||||
Ok(observations_written)
|
||||
}
|
||||
|
||||
/// Write a single observation's events to the output.
|
||||
fn write_observation(&mut self, observation: Observation) -> Result<(), Vec<Diagnostic>> {
|
||||
for event in observation.to_events() {
|
||||
let event_json = serde_json::to_string(&event).map_err(|e| {
|
||||
vec![Diagnostic::fatal(
|
||||
DiagnosticCode::InvalidEventJson,
|
||||
format!("I couldn't serialize an event to JSON: {}", e),
|
||||
)]
|
||||
})?;
|
||||
|
||||
writeln!(self.writer, "{}", event_json).map_err(|e| {
|
||||
vec![Diagnostic::fatal(
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't write to the output: {}", e),
|
||||
)]
|
||||
})?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Check if we should write a snapshot based on observation count.
|
||||
fn should_write_snapshot(&self) -> bool {
|
||||
if let Some(interval) = self.snapshot_interval {
|
||||
self.observation_count > 0 && self.observation_count % interval == 0
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
/// Write a snapshot event.
|
||||
fn write_snapshot(&mut self, state: &Value, timestamp: DateTime<Utc>) -> Result<(), Vec<Diagnostic>> {
|
||||
let snapshot_id = format!("snapshot-{}", Uuid::new_v4());
|
||||
let snapshot = Event::Snapshot {
|
||||
observation_id: snapshot_id,
|
||||
timestamp,
|
||||
object: state.clone(),
|
||||
};
|
||||
|
||||
let snapshot_json = serde_json::to_string(&snapshot).map_err(|e| {
|
||||
vec![Diagnostic::fatal(
|
||||
DiagnosticCode::InvalidEventJson,
|
||||
format!("I couldn't serialize the snapshot to JSON: {}", e),
|
||||
)]
|
||||
})?;
|
||||
|
||||
writeln!(self.writer, "{}", snapshot_json).map_err(|e| {
|
||||
vec![Diagnostic::fatal(
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't write to the output: {}", e),
|
||||
)]
|
||||
})?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Finish the write operation.
|
||||
///
|
||||
/// This flushes the writer and, for compressed append operations,
|
||||
/// performs the atomic file replacement.
|
||||
pub fn finish(mut self) -> Result<DiagnosticCollector, Vec<Diagnostic>> {
|
||||
// Flush the writer
|
||||
self.writer.flush().map_err(|e| {
|
||||
vec![Diagnostic::fatal(
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't flush the output file: {}", e),
|
||||
)]
|
||||
})?;
|
||||
|
||||
// Handle atomic replacement if needed
|
||||
match self.finish_strategy {
|
||||
FinishStrategy::FlushOnly => {
|
||||
// Nothing more to do
|
||||
}
|
||||
FinishStrategy::AtomicReplace { temp_path, output_path } => {
|
||||
atomic_replace_file(&output_path, &temp_path)?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(self.diagnostics)
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the file modification time as a DateTime<Utc>.
|
||||
fn get_file_mtime<P: AsRef<Path>>(path: P) -> Result<DateTime<Utc>, Vec<Diagnostic>> {
|
||||
let path = path.as_ref();
|
||||
let metadata = std::fs::metadata(path).map_err(|e| {
|
||||
vec![Diagnostic::fatal(
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't get metadata for '{}': {}", path.display(), e),
|
||||
)]
|
||||
})?;
|
||||
|
||||
let modified = metadata.modified().map_err(|e| {
|
||||
vec![Diagnostic::fatal(
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't get modification time for '{}': {}", path.display(), e),
|
||||
)]
|
||||
})?;
|
||||
|
||||
Ok(modified.into())
|
||||
}
|
||||
|
||||
/// Encoder wrapper that provides a uniform interface for different compression formats.
|
||||
///
|
||||
/// This enum wraps the various compression encoders so we can treat them uniformly
|
||||
/// in the append-to-compressed-archive flow.
|
||||
#[cfg(feature = "compression")]
|
||||
pub enum CompressedWriter {
|
||||
Gzip(flate2::write::GzEncoder<std::fs::File>),
|
||||
Zlib(flate2::write::ZlibEncoder<std::fs::File>),
|
||||
Zstd(zstd::stream::write::Encoder<'static, std::fs::File>),
|
||||
Brotli(brotli::CompressorWriter<std::fs::File>),
|
||||
}
|
||||
|
||||
#[cfg(feature = "compression")]
|
||||
impl Write for CompressedWriter {
|
||||
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
|
||||
match self {
|
||||
CompressedWriter::Gzip(w) => w.write(buf),
|
||||
CompressedWriter::Zlib(w) => w.write(buf),
|
||||
CompressedWriter::Zstd(w) => w.write(buf),
|
||||
CompressedWriter::Brotli(w) => w.write(buf),
|
||||
}
|
||||
}
|
||||
|
||||
fn flush(&mut self) -> std::io::Result<()> {
|
||||
match self {
|
||||
CompressedWriter::Gzip(w) => w.flush(),
|
||||
CompressedWriter::Zlib(w) => w.flush(),
|
||||
CompressedWriter::Zstd(w) => w.flush(),
|
||||
CompressedWriter::Brotli(w) => w.flush(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "compression")]
|
||||
impl CompressedWriter {
|
||||
/// Create a new compressed writer for the given format and file.
|
||||
pub fn new(format: CompressionFormat, file: std::fs::File) -> Result<Self, Diagnostic> {
|
||||
use flate2::Compression;
|
||||
|
||||
match format {
|
||||
CompressionFormat::Gzip => {
|
||||
Ok(CompressedWriter::Gzip(flate2::write::GzEncoder::new(file, Compression::default())))
|
||||
}
|
||||
CompressionFormat::Zlib => {
|
||||
Ok(CompressedWriter::Zlib(flate2::write::ZlibEncoder::new(file, Compression::default())))
|
||||
}
|
||||
CompressionFormat::Zstd => {
|
||||
let encoder = zstd::stream::write::Encoder::new(file, 0).map_err(|e| {
|
||||
Diagnostic::fatal(
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't create zstd encoder: {}", e),
|
||||
)
|
||||
})?;
|
||||
Ok(CompressedWriter::Zstd(encoder))
|
||||
}
|
||||
CompressionFormat::Brotli => {
|
||||
Ok(CompressedWriter::Brotli(brotli::CompressorWriter::new(file, 4096, 11, 22)))
|
||||
}
|
||||
CompressionFormat::Deflate => {
|
||||
// Deflate is typically used within gzip/zlib, not standalone for files
|
||||
Err(Diagnostic::fatal(
|
||||
DiagnosticCode::UnsupportedVersion,
|
||||
"Standalone deflate compression is not supported for writing.".to_string(),
|
||||
))
|
||||
}
|
||||
CompressionFormat::None => {
|
||||
Err(Diagnostic::fatal(
|
||||
DiagnosticCode::UnsupportedVersion,
|
||||
"CompressedWriter::new called with CompressionFormat::None".to_string(),
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Finish compression and return any errors.
|
||||
///
|
||||
/// This must be called before the file is closed to ensure all
|
||||
/// compressed data is flushed.
|
||||
pub fn finish(self) -> Result<(), Diagnostic> {
|
||||
match self {
|
||||
CompressedWriter::Gzip(w) => {
|
||||
w.finish().map_err(|e| {
|
||||
Diagnostic::fatal(
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't finish gzip compression: {}", e),
|
||||
)
|
||||
})?;
|
||||
}
|
||||
CompressedWriter::Zlib(w) => {
|
||||
w.finish().map_err(|e| {
|
||||
Diagnostic::fatal(
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't finish zlib compression: {}", e),
|
||||
)
|
||||
})?;
|
||||
}
|
||||
CompressedWriter::Zstd(w) => {
|
||||
w.finish().map_err(|e| {
|
||||
Diagnostic::fatal(
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't finish zstd compression: {}", e),
|
||||
)
|
||||
})?;
|
||||
}
|
||||
CompressedWriter::Brotli(mut w) => {
|
||||
// Brotli doesn't have a finish() method, flush is sufficient
|
||||
w.flush().map_err(|e| {
|
||||
Diagnostic::fatal(
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't flush brotli compression: {}", e),
|
||||
)
|
||||
})?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// A write context specifically for compressed output.
|
||||
///
|
||||
/// This wraps WriteContext to handle the finish() call properly for
|
||||
/// compressed writers, which need to call finish() on the encoder
|
||||
/// before the atomic file swap.
|
||||
#[cfg(feature = "compression")]
|
||||
pub struct CompressedWriteContext {
|
||||
/// The inner write context.
|
||||
inner: WriteContext<CompressedWriter>,
|
||||
}
|
||||
|
||||
#[cfg(feature = "compression")]
|
||||
impl CompressedWriteContext {
|
||||
/// Create a new compressed write context.
|
||||
pub fn new(
|
||||
writer: CompressedWriter,
|
||||
current_state: Value,
|
||||
observation_count: usize,
|
||||
snapshot_interval: Option<usize>,
|
||||
finish_strategy: FinishStrategy,
|
||||
diagnostics: DiagnosticCollector,
|
||||
) -> Self {
|
||||
Self {
|
||||
inner: WriteContext::with_diagnostics(
|
||||
writer,
|
||||
current_state,
|
||||
observation_count,
|
||||
snapshot_interval,
|
||||
finish_strategy,
|
||||
diagnostics,
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
/// Write observations for a list of JSON files.
|
||||
pub fn write_observations<P: AsRef<Path>>(
|
||||
&mut self,
|
||||
files: &[P],
|
||||
) -> Result<usize, Vec<Diagnostic>> {
|
||||
self.inner.write_observations(files)
|
||||
}
|
||||
|
||||
/// Write raw bytes to the output (used for copying existing archive content).
|
||||
pub fn write_raw(&mut self, bytes: &[u8]) -> Result<(), Vec<Diagnostic>> {
|
||||
self.inner.writer.write_all(bytes).map_err(|e| {
|
||||
vec![Diagnostic::fatal(
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't write to the output: {}", e),
|
||||
)]
|
||||
})
|
||||
}
|
||||
|
||||
/// Finish the write operation.
|
||||
///
|
||||
/// This finishes the compression encoder, then performs any atomic
|
||||
/// file operations needed.
|
||||
pub fn finish(self) -> Result<DiagnosticCollector, Vec<Diagnostic>> {
|
||||
let finish_strategy = self.inner.finish_strategy.clone();
|
||||
let diagnostics = self.inner.diagnostics;
|
||||
|
||||
// Finish compression first
|
||||
self.inner.writer.finish().map_err(|d| vec![d])?;
|
||||
|
||||
// Then handle atomic replacement if needed
|
||||
match finish_strategy {
|
||||
FinishStrategy::FlushOnly => {
|
||||
// Nothing more to do
|
||||
}
|
||||
FinishStrategy::AtomicReplace { temp_path, output_path } => {
|
||||
atomic_replace_file(&output_path, &temp_path)?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(diagnostics)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use serde_json::json;
|
||||
|
||||
#[test]
|
||||
fn test_write_context_single_observation() {
|
||||
let mut output = Vec::new();
|
||||
let initial_state = json!({"count": 0});
|
||||
|
||||
{
|
||||
let mut ctx = WriteContext::new(
|
||||
&mut output,
|
||||
initial_state,
|
||||
0,
|
||||
None,
|
||||
FinishStrategy::FlushOnly,
|
||||
);
|
||||
|
||||
// Create a temp file with new state
|
||||
let mut temp_file = tempfile::NamedTempFile::new().unwrap();
|
||||
std::io::Write::write_all(&mut temp_file, br#"{"count": 1}"#).unwrap();
|
||||
temp_file.flush().unwrap();
|
||||
|
||||
let count = ctx.write_observations(&[temp_file.path()]).unwrap();
|
||||
assert_eq!(count, 1);
|
||||
}
|
||||
|
||||
let output_str = String::from_utf8(output).unwrap();
|
||||
assert!(output_str.contains("# Processing file:"));
|
||||
assert!(output_str.contains("observe"));
|
||||
assert!(output_str.contains("change"));
|
||||
assert!(output_str.contains("/count"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_write_context_no_changes() {
|
||||
let mut output = Vec::new();
|
||||
let initial_state = json!({"count": 0});
|
||||
|
||||
{
|
||||
let mut ctx = WriteContext::new(
|
||||
&mut output,
|
||||
initial_state,
|
||||
0,
|
||||
None,
|
||||
FinishStrategy::FlushOnly,
|
||||
);
|
||||
|
||||
// Create a temp file with same state
|
||||
let mut temp_file = tempfile::NamedTempFile::new().unwrap();
|
||||
std::io::Write::write_all(&mut temp_file, br#"{"count": 0}"#).unwrap();
|
||||
temp_file.flush().unwrap();
|
||||
|
||||
let count = ctx.write_observations(&[temp_file.path()]).unwrap();
|
||||
assert_eq!(count, 0);
|
||||
}
|
||||
|
||||
let output_str = String::from_utf8(output).unwrap();
|
||||
// Should have comment but no events
|
||||
assert!(output_str.contains("# Processing file:"));
|
||||
assert!(!output_str.contains("observe"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_should_write_snapshot() {
|
||||
let output: Vec<u8> = Vec::new();
|
||||
|
||||
// No interval set
|
||||
let ctx: WriteContext<Vec<u8>> = WriteContext::new(
|
||||
output.clone(),
|
||||
json!({}),
|
||||
5,
|
||||
None,
|
||||
FinishStrategy::FlushOnly,
|
||||
);
|
||||
assert!(!ctx.should_write_snapshot());
|
||||
|
||||
// Interval of 2, at observation 4 (multiple of 2)
|
||||
let ctx: WriteContext<Vec<u8>> = WriteContext::new(
|
||||
output.clone(),
|
||||
json!({}),
|
||||
4,
|
||||
Some(2),
|
||||
FinishStrategy::FlushOnly,
|
||||
);
|
||||
assert!(ctx.should_write_snapshot());
|
||||
|
||||
// Interval of 2, at observation 3 (not multiple of 2)
|
||||
let ctx: WriteContext<Vec<u8>> = WriteContext::new(
|
||||
output,
|
||||
json!({}),
|
||||
3,
|
||||
Some(2),
|
||||
FinishStrategy::FlushOnly,
|
||||
);
|
||||
assert!(!ctx.should_write_snapshot());
|
||||
}
|
||||
}
|
||||
233
src/archive_open.rs
Normal file
233
src/archive_open.rs
Normal file
|
|
@ -0,0 +1,233 @@
|
|||
// json-archive is a tool for tracking JSON file changes over time
|
||||
// Copyright (C) 2025 Peoples Grocers LLC
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as published
|
||||
// by the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
//
|
||||
// To purchase a license under different terms contact admin@peoplesgrocers.com
|
||||
// To request changes, report bugs, or give user feedback contact
|
||||
// marxism@peoplesgrocers.com
|
||||
//
|
||||
|
||||
//! Unified archive file opening with compression detection.
|
||||
//!
|
||||
//! This module provides a single entry point for opening archive files that:
|
||||
//! - Detects compression format from magic bytes
|
||||
//! - Creates the appropriate decompressor
|
||||
//! - Returns a unified `BufRead` interface
|
||||
//!
|
||||
//! This eliminates duplicated compression detection logic across the codebase.
|
||||
|
||||
use std::fs::File;
|
||||
use std::io::{BufRead, BufReader, Read};
|
||||
use std::path::Path;
|
||||
|
||||
use crate::detection::{detect_compression_format, CompressionFormat};
|
||||
use crate::diagnostics::{Diagnostic, DiagnosticCode};
|
||||
|
||||
#[cfg(feature = "compression")]
|
||||
use brotli::Decompressor;
|
||||
#[cfg(feature = "compression")]
|
||||
use flate2::read::{DeflateDecoder, GzDecoder, ZlibDecoder};
|
||||
#[cfg(feature = "compression")]
|
||||
use zstd::stream::read::Decoder as ZstdDecoder;
|
||||
|
||||
/// Result of opening an archive file for reading.
|
||||
pub struct OpenedArchive {
|
||||
/// Buffered reader that handles decompression transparently.
|
||||
pub reader: Box<dyn BufRead>,
|
||||
/// The detected compression format.
|
||||
pub format: CompressionFormat,
|
||||
}
|
||||
|
||||
/// Opens an archive file and returns a buffered reader that handles decompression.
|
||||
///
|
||||
/// This function:
|
||||
/// 1. Opens the file
|
||||
/// 2. Reads magic bytes to detect compression
|
||||
/// 3. Reopens and wraps with appropriate decompressor
|
||||
/// 4. Returns a unified `BufRead` interface
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `path` - Path to the archive file
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// Returns `OpenedArchive` containing the reader and detected format,
|
||||
/// or a diagnostic if the file couldn't be opened.
|
||||
///
|
||||
/// # Feature flags
|
||||
///
|
||||
/// When built without the `compression` feature, compressed files will still
|
||||
/// be detected but will return an error diagnostic. The caller should check
|
||||
/// the format and handle this case appropriately.
|
||||
pub fn open_archive<P: AsRef<Path>>(path: P) -> Result<OpenedArchive, Diagnostic> {
|
||||
let path = path.as_ref();
|
||||
let filename = path.display().to_string();
|
||||
|
||||
// Open file and read magic bytes
|
||||
let mut file = File::open(path).map_err(|e| {
|
||||
Diagnostic::fatal(
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't open the archive file: {}", e),
|
||||
)
|
||||
.with_location(filename.clone(), 1)
|
||||
})?;
|
||||
|
||||
let mut magic_bytes = [0u8; 4];
|
||||
let bytes_read = file.read(&mut magic_bytes).map_err(|e| {
|
||||
Diagnostic::fatal(
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't read from the archive file: {}", e),
|
||||
)
|
||||
.with_location(filename.clone(), 1)
|
||||
})?;
|
||||
|
||||
let format = detect_compression_format(path, &magic_bytes[..bytes_read]);
|
||||
|
||||
// Reopen file to reset position
|
||||
let file = File::open(path).map_err(|e| {
|
||||
Diagnostic::fatal(
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't reopen the archive file: {}", e),
|
||||
)
|
||||
.with_location(filename.clone(), 1)
|
||||
})?;
|
||||
|
||||
// Create appropriate reader based on compression format
|
||||
#[cfg(feature = "compression")]
|
||||
let reader: Box<dyn BufRead> = match format {
|
||||
CompressionFormat::Gzip => Box::new(BufReader::new(GzDecoder::new(file))),
|
||||
CompressionFormat::Deflate => Box::new(BufReader::new(DeflateDecoder::new(file))),
|
||||
CompressionFormat::Zlib => Box::new(BufReader::new(ZlibDecoder::new(file))),
|
||||
CompressionFormat::Brotli => Box::new(BufReader::new(Decompressor::new(file, 4096))),
|
||||
CompressionFormat::Zstd => {
|
||||
let decoder = ZstdDecoder::new(file).map_err(|e| {
|
||||
Diagnostic::fatal(
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't create zstd decoder: {}", e),
|
||||
)
|
||||
.with_location(filename.clone(), 1)
|
||||
})?;
|
||||
Box::new(BufReader::new(decoder))
|
||||
}
|
||||
CompressionFormat::None => Box::new(BufReader::new(file)),
|
||||
};
|
||||
|
||||
#[cfg(not(feature = "compression"))]
|
||||
let reader: Box<dyn BufRead> = Box::new(BufReader::new(file));
|
||||
|
||||
Ok(OpenedArchive { reader, format })
|
||||
}
|
||||
|
||||
/// Checks if the detected compression format is supported by this build.
|
||||
///
|
||||
/// Returns a diagnostic error if compression was detected but the binary
|
||||
/// was built without compression support.
|
||||
#[cfg_attr(feature = "compression", allow(unused_variables))]
|
||||
pub fn check_compression_support(
|
||||
format: CompressionFormat,
|
||||
filename: &str,
|
||||
) -> Result<(), Diagnostic> {
|
||||
#[cfg(not(feature = "compression"))]
|
||||
if format != CompressionFormat::None {
|
||||
let format_name = match format {
|
||||
CompressionFormat::Gzip => "gzip",
|
||||
CompressionFormat::Deflate => "deflate",
|
||||
CompressionFormat::Zlib => "zlib",
|
||||
CompressionFormat::Brotli => "brotli",
|
||||
CompressionFormat::Zstd => "zstd",
|
||||
CompressionFormat::None => unreachable!(),
|
||||
};
|
||||
|
||||
return Err(Diagnostic::fatal(
|
||||
DiagnosticCode::UnsupportedVersion,
|
||||
format!(
|
||||
"I detected a {}-compressed archive, but this build doesn't support compression.",
|
||||
format_name
|
||||
),
|
||||
)
|
||||
.with_location(filename.to_string(), 1)
|
||||
.with_advice(
|
||||
"This binary was built without compression support to reduce binary size and dependencies.\n\
|
||||
You have two options:\n\
|
||||
1. Install the version with compression support: cargo install json-archive --features compression\n\
|
||||
2. Manually decompress the file first, then use this tool on the uncompressed archive"
|
||||
.to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Convenience function to check if a file is compressed.
|
||||
///
|
||||
/// This opens the file, reads magic bytes, and returns the compression format.
|
||||
/// Useful when you need to know the format before deciding how to process the file.
|
||||
pub fn detect_archive_compression<P: AsRef<Path>>(path: P) -> Result<CompressionFormat, Diagnostic> {
|
||||
let path = path.as_ref();
|
||||
let filename = path.display().to_string();
|
||||
|
||||
let mut file = File::open(path).map_err(|e| {
|
||||
Diagnostic::fatal(
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't open the file to check compression: {}", e),
|
||||
)
|
||||
.with_location(filename.clone(), 1)
|
||||
})?;
|
||||
|
||||
let mut magic_bytes = [0u8; 4];
|
||||
let bytes_read = file.read(&mut magic_bytes).map_err(|e| {
|
||||
Diagnostic::fatal(
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't read from the file: {}", e),
|
||||
)
|
||||
.with_location(filename, 1)
|
||||
})?;
|
||||
|
||||
Ok(detect_compression_format(path, &magic_bytes[..bytes_read]))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::io::Write;
|
||||
use tempfile::NamedTempFile;
|
||||
|
||||
#[test]
|
||||
fn test_open_uncompressed_archive() {
|
||||
let mut temp_file = NamedTempFile::new().unwrap();
|
||||
writeln!(temp_file, r#"{{"type":"@peoplesgrocers/json-archive","version":1}}"#).unwrap();
|
||||
temp_file.flush().unwrap();
|
||||
|
||||
let opened = open_archive(temp_file.path()).unwrap();
|
||||
assert_eq!(opened.format, CompressionFormat::None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_detect_archive_compression_uncompressed() {
|
||||
let mut temp_file = NamedTempFile::new().unwrap();
|
||||
writeln!(temp_file, "plain text content").unwrap();
|
||||
temp_file.flush().unwrap();
|
||||
|
||||
let format = detect_archive_compression(temp_file.path()).unwrap();
|
||||
assert_eq!(format, CompressionFormat::None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_open_nonexistent_file() {
|
||||
let result = open_archive("/nonexistent/path/to/file.json.archive");
|
||||
assert!(result.is_err());
|
||||
}
|
||||
}
|
||||
644
src/archive_ops.rs
Normal file
644
src/archive_ops.rs
Normal file
|
|
@ -0,0 +1,644 @@
|
|||
// json-archive is a tool for tracking JSON file changes over time
|
||||
// Copyright (C) 2025 Peoples Grocers LLC
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as published
|
||||
// by the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
//
|
||||
// To purchase a license under different terms contact admin@peoplesgrocers.com
|
||||
// To request changes, report bugs, or give user feedback contact
|
||||
// marxism@peoplesgrocers.com
|
||||
//
|
||||
|
||||
//! High-level archive operations: create and append.
|
||||
//!
|
||||
//! This module provides the top-level entry points for creating and appending
|
||||
//! to archives. These functions handle all the setup (opening files, detecting
|
||||
//! compression, reading existing state) and then delegate to the shared
|
||||
//! `WriteContext` for the actual observation writing.
|
||||
//!
|
||||
//! ## Architecture
|
||||
//!
|
||||
//! ```text
|
||||
//! ┌─────────────────┐
|
||||
//! │ archive_ops.rs │
|
||||
//! │ (this module) │
|
||||
//! └────────┬────────┘
|
||||
//! │
|
||||
//! ┌─────────────────┼─────────────────┐
|
||||
//! │ │ │
|
||||
//! ▼ ▼ ▼
|
||||
//! ┌───────────────┐ ┌───────────────┐ ┌───────────────┐
|
||||
//! │ archive_open │ │archive_context│ │ archive_reader│
|
||||
//! │ (compression) │ │ (WriteContext)│ │ (parsing) │
|
||||
//! └───────────────┘ └───────────────┘ └───────────────┘
|
||||
//! ```
|
||||
//!
|
||||
//! ## Operations
|
||||
//!
|
||||
//! - `create_archive`: Create a new archive from one or more JSON files
|
||||
//! - `append_to_archive`: Add observations to an existing archive
|
||||
|
||||
use std::fs::{File, OpenOptions};
|
||||
use std::io::{BufWriter, Read, Write};
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::archive_context::{FinishStrategy, WriteContext};
|
||||
use crate::archive_open::{check_compression_support, detect_archive_compression, open_archive};
|
||||
use crate::archive_reader::{ArchiveReader, ReadMode};
|
||||
use crate::atomic_file::generate_temp_filename;
|
||||
use crate::detection::CompressionFormat;
|
||||
use crate::diagnostics::{Diagnostic, DiagnosticCode};
|
||||
use crate::events::Header;
|
||||
|
||||
#[cfg(feature = "compression")]
|
||||
use crate::archive_context::{CompressedWriteContext, CompressedWriter};
|
||||
|
||||
/// Create a new archive from a list of JSON files.
|
||||
///
|
||||
/// The first file becomes the initial state in the header. Each subsequent
|
||||
/// file generates an observation with the diff from the previous state.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `input_files` - List of JSON files to process (at least one required)
|
||||
/// * `output_path` - Path for the new archive file
|
||||
/// * `source` - Optional source identifier for the header
|
||||
/// * `snapshot_interval` - Optional interval for writing snapshots
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// Returns an empty Vec on success, or a Vec of diagnostics on error.
|
||||
pub fn create_archive<P: AsRef<Path>>(
|
||||
input_files: &[P],
|
||||
output_path: P,
|
||||
source: Option<String>,
|
||||
snapshot_interval: Option<usize>,
|
||||
) -> Vec<Diagnostic> {
|
||||
if input_files.is_empty() {
|
||||
return vec![Diagnostic::fatal(
|
||||
DiagnosticCode::MissingHeaderField,
|
||||
"I need at least one input file to create an archive.".to_string(),
|
||||
)];
|
||||
}
|
||||
|
||||
// Read and parse the first file to get initial state
|
||||
let first_path = input_files[0].as_ref();
|
||||
let first_content = match std::fs::read_to_string(first_path) {
|
||||
Ok(content) => content,
|
||||
Err(e) => {
|
||||
return vec![Diagnostic::fatal(
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't read the first input file '{}': {}", first_path.display(), e),
|
||||
)];
|
||||
}
|
||||
};
|
||||
|
||||
let initial_state: Value = match serde_json::from_str(&first_content) {
|
||||
Ok(state) => state,
|
||||
Err(e) => {
|
||||
return vec![Diagnostic::fatal(
|
||||
DiagnosticCode::InvalidEventJson,
|
||||
format!("I couldn't parse '{}' as JSON: {}", first_path.display(), e),
|
||||
)
|
||||
.with_advice("Make sure the file contains valid JSON.".to_string())];
|
||||
}
|
||||
};
|
||||
|
||||
// Create the output file
|
||||
let output_path = output_path.as_ref();
|
||||
let file = match File::create(output_path) {
|
||||
Ok(f) => f,
|
||||
Err(e) => {
|
||||
return vec![Diagnostic::fatal(
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't create the output file '{}': {}", output_path.display(), e),
|
||||
)
|
||||
.with_advice(
|
||||
"Make sure you have write permission in this directory and that the path is valid."
|
||||
.to_string(),
|
||||
)];
|
||||
}
|
||||
};
|
||||
|
||||
let mut writer = BufWriter::new(file);
|
||||
|
||||
// Write the header
|
||||
let header = Header::new(initial_state.clone(), source);
|
||||
let header_json = match serde_json::to_string(&header) {
|
||||
Ok(json) => json,
|
||||
Err(e) => {
|
||||
return vec![Diagnostic::fatal(
|
||||
DiagnosticCode::InvalidEventJson,
|
||||
format!("I couldn't serialize the header to JSON: {}", e),
|
||||
)];
|
||||
}
|
||||
};
|
||||
|
||||
if let Err(e) = writeln!(writer, "{}", header_json) {
|
||||
return vec![Diagnostic::fatal(
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't write to the output file: {}", e),
|
||||
)];
|
||||
}
|
||||
|
||||
// If there are more files, process them through WriteContext
|
||||
if input_files.len() > 1 {
|
||||
let mut ctx = WriteContext::new(
|
||||
writer,
|
||||
initial_state,
|
||||
0,
|
||||
snapshot_interval,
|
||||
FinishStrategy::FlushOnly,
|
||||
);
|
||||
|
||||
// Process remaining files (skip the first one which is now the initial state)
|
||||
let remaining_files: Vec<&Path> = input_files[1..].iter().map(|p| p.as_ref()).collect();
|
||||
if let Err(diagnostics) = ctx.write_observations(&remaining_files) {
|
||||
return diagnostics;
|
||||
}
|
||||
|
||||
if let Err(diagnostics) = ctx.finish() {
|
||||
return diagnostics;
|
||||
}
|
||||
} else {
|
||||
// Just flush the header
|
||||
if let Err(e) = writer.flush() {
|
||||
return vec![Diagnostic::fatal(
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't flush the output file: {}", e),
|
||||
)];
|
||||
}
|
||||
}
|
||||
|
||||
Vec::new()
|
||||
}
|
||||
|
||||
/// Append observations to an existing archive.
|
||||
///
|
||||
/// This function handles both compressed and uncompressed archives:
|
||||
/// - Uncompressed: Opens in append mode and writes new observations directly
|
||||
/// - Compressed: Reads entire archive, writes to temp file, atomic swap
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `archive_path` - Path to the existing archive
|
||||
/// * `new_files` - List of JSON files to add as observations
|
||||
/// * `output_path` - Where to write the result (can be same as archive_path)
|
||||
/// * `source` - Optional source identifier (not currently used for append)
|
||||
/// * `snapshot_interval` - Optional interval for writing snapshots
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// Returns an empty Vec on success, or a Vec of diagnostics on error.
|
||||
pub fn append_to_archive<P: AsRef<Path>, Q: AsRef<Path>>(
|
||||
archive_path: P,
|
||||
new_files: &[Q],
|
||||
output_path: P,
|
||||
_source: Option<String>,
|
||||
snapshot_interval: Option<usize>,
|
||||
) -> Vec<Diagnostic> {
|
||||
let archive_path = archive_path.as_ref();
|
||||
let output_path = output_path.as_ref();
|
||||
|
||||
// Detect compression format
|
||||
let format = match detect_archive_compression(archive_path) {
|
||||
Ok(f) => f,
|
||||
Err(diag) => return vec![diag],
|
||||
};
|
||||
|
||||
// Check if this build supports the detected compression
|
||||
if let Err(diag) = check_compression_support(format, &archive_path.display().to_string()) {
|
||||
return vec![diag];
|
||||
}
|
||||
|
||||
if format == CompressionFormat::None {
|
||||
append_to_uncompressed_archive(archive_path, new_files, output_path, snapshot_interval)
|
||||
} else {
|
||||
append_to_compressed_archive(archive_path, new_files, output_path, format, snapshot_interval)
|
||||
}
|
||||
}
|
||||
|
||||
/// Append to an uncompressed archive.
|
||||
///
|
||||
/// This reads the archive to get the final state, then opens the file
|
||||
/// in append mode to add new observations.
|
||||
fn append_to_uncompressed_archive<P: AsRef<Path>, Q: AsRef<Path>>(
|
||||
archive_path: P,
|
||||
new_files: &[Q],
|
||||
output_path: P,
|
||||
snapshot_interval: Option<usize>,
|
||||
) -> Vec<Diagnostic> {
|
||||
let archive_path = archive_path.as_ref();
|
||||
let output_path = output_path.as_ref();
|
||||
|
||||
// Read the existing archive to get final state
|
||||
let reader = match ArchiveReader::new(archive_path, ReadMode::AppendSeek) {
|
||||
Ok(r) => r,
|
||||
Err(e) => {
|
||||
return vec![Diagnostic::fatal(
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't open the archive for reading: {}", e),
|
||||
)];
|
||||
}
|
||||
};
|
||||
|
||||
let read_result = match reader.read(archive_path) {
|
||||
Ok(result) => result,
|
||||
Err(e) => {
|
||||
return vec![Diagnostic::fatal(
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't read the archive: {}", e),
|
||||
)];
|
||||
}
|
||||
};
|
||||
|
||||
// Check for fatal diagnostics in the archive
|
||||
if read_result.diagnostics.has_fatal() {
|
||||
let mut diagnostics = vec![Diagnostic::fatal(
|
||||
DiagnosticCode::InvalidEventJson,
|
||||
"The existing archive contains fatal errors. Cannot append to a corrupt archive."
|
||||
.to_string(),
|
||||
)];
|
||||
diagnostics.extend(read_result.diagnostics.into_diagnostics());
|
||||
return diagnostics;
|
||||
}
|
||||
|
||||
// If output path is different from archive path, copy the archive first
|
||||
if archive_path != output_path {
|
||||
if let Err(e) = std::fs::copy(archive_path, output_path) {
|
||||
return vec![Diagnostic::fatal(
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't copy the archive to the output location: {}", e),
|
||||
)];
|
||||
}
|
||||
}
|
||||
|
||||
// Open file in append mode
|
||||
let file = match OpenOptions::new().append(true).open(output_path) {
|
||||
Ok(f) => f,
|
||||
Err(e) => {
|
||||
return vec![Diagnostic::fatal(
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't open the archive file for appending: {}", e),
|
||||
)
|
||||
.with_advice(
|
||||
"Make sure the archive file exists and you have write permission.".to_string(),
|
||||
)];
|
||||
}
|
||||
};
|
||||
|
||||
// Create write context and process files
|
||||
let mut ctx = WriteContext::with_diagnostics(
|
||||
file,
|
||||
read_result.final_state,
|
||||
read_result.observation_count,
|
||||
snapshot_interval,
|
||||
FinishStrategy::FlushOnly,
|
||||
read_result.diagnostics,
|
||||
);
|
||||
|
||||
let file_refs: Vec<&Path> = new_files.iter().map(|p| p.as_ref()).collect();
|
||||
if let Err(diagnostics) = ctx.write_observations(&file_refs) {
|
||||
return diagnostics;
|
||||
}
|
||||
|
||||
match ctx.finish() {
|
||||
Ok(collector) => collector.into_diagnostics(),
|
||||
Err(diagnostics) => diagnostics,
|
||||
}
|
||||
}
|
||||
|
||||
/// Append to a compressed archive.
|
||||
///
|
||||
/// This reads the entire archive (decompressing), writes everything to a
|
||||
/// new compressed temp file with the new observations, then atomically
|
||||
/// swaps the temp file with the original.
|
||||
#[cfg(feature = "compression")]
|
||||
fn append_to_compressed_archive<P: AsRef<Path>, Q: AsRef<Path>>(
|
||||
archive_path: P,
|
||||
new_files: &[Q],
|
||||
output_path: P,
|
||||
format: CompressionFormat,
|
||||
snapshot_interval: Option<usize>,
|
||||
) -> Vec<Diagnostic> {
|
||||
let archive_path = archive_path.as_ref();
|
||||
let output_path = output_path.as_ref();
|
||||
|
||||
// Step 1: Open and decompress the archive, reading all bytes
|
||||
let opened = match open_archive(archive_path) {
|
||||
Ok(o) => o,
|
||||
Err(diag) => return vec![diag],
|
||||
};
|
||||
|
||||
// Read all decompressed bytes into memory
|
||||
let mut decompressed_bytes = Vec::new();
|
||||
let mut reader = opened.reader;
|
||||
if let Err(e) = reader.read_to_end(&mut decompressed_bytes) {
|
||||
return vec![Diagnostic::fatal(
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't read the compressed archive: {}", e),
|
||||
)];
|
||||
}
|
||||
|
||||
// Step 2: Parse the archive to get final state using AppendSeek mode
|
||||
// We need to re-read from the decompressed bytes
|
||||
let archive_reader = match ArchiveReader::new(archive_path, ReadMode::AppendSeek) {
|
||||
Ok(r) => r,
|
||||
Err(e) => {
|
||||
return vec![Diagnostic::fatal(
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't create archive reader: {}", e),
|
||||
)];
|
||||
}
|
||||
};
|
||||
|
||||
let read_result = match archive_reader.read(archive_path) {
|
||||
Ok(result) => result,
|
||||
Err(e) => {
|
||||
return vec![Diagnostic::fatal(
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't parse the archive: {}", e),
|
||||
)];
|
||||
}
|
||||
};
|
||||
|
||||
// Check for fatal diagnostics
|
||||
if read_result.diagnostics.has_fatal() {
|
||||
let mut diagnostics = vec![Diagnostic::fatal(
|
||||
DiagnosticCode::InvalidEventJson,
|
||||
"The existing archive contains fatal errors. Cannot append to a corrupt archive."
|
||||
.to_string(),
|
||||
)];
|
||||
diagnostics.extend(read_result.diagnostics.into_diagnostics());
|
||||
return diagnostics;
|
||||
}
|
||||
|
||||
// Step 3: Create temp file with same compression format
|
||||
let temp_path = generate_temp_filename(output_path);
|
||||
let temp_file = match File::create(&temp_path) {
|
||||
Ok(f) => f,
|
||||
Err(e) => {
|
||||
return vec![Diagnostic::fatal(
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't create temp file: {}", e),
|
||||
)];
|
||||
}
|
||||
};
|
||||
|
||||
// Create compressed writer
|
||||
let compressed_writer = match CompressedWriter::new(format, temp_file) {
|
||||
Ok(w) => w,
|
||||
Err(diag) => {
|
||||
let _ = std::fs::remove_file(&temp_path);
|
||||
return vec![diag];
|
||||
}
|
||||
};
|
||||
|
||||
// Step 4: Create write context and copy old data + write new observations
|
||||
let mut ctx = CompressedWriteContext::new(
|
||||
compressed_writer,
|
||||
read_result.final_state,
|
||||
read_result.observation_count,
|
||||
snapshot_interval,
|
||||
FinishStrategy::AtomicReplace {
|
||||
temp_path: temp_path.clone(),
|
||||
output_path: output_path.to_path_buf(),
|
||||
},
|
||||
read_result.diagnostics,
|
||||
);
|
||||
|
||||
// Write all old decompressed bytes first
|
||||
if let Err(diagnostics) = ctx.write_raw(&decompressed_bytes) {
|
||||
let _ = std::fs::remove_file(&temp_path);
|
||||
return diagnostics;
|
||||
}
|
||||
|
||||
// Write new observations
|
||||
let file_refs: Vec<&Path> = new_files.iter().map(|p| p.as_ref()).collect();
|
||||
if let Err(diagnostics) = ctx.write_observations(&file_refs) {
|
||||
let _ = std::fs::remove_file(&temp_path);
|
||||
return diagnostics;
|
||||
}
|
||||
|
||||
// Finish (this handles compression finalization and atomic swap)
|
||||
match ctx.finish() {
|
||||
Ok(collector) => collector.into_diagnostics(),
|
||||
Err(diagnostics) => {
|
||||
let _ = std::fs::remove_file(&temp_path);
|
||||
diagnostics
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Stub for when compression feature is not enabled.
|
||||
#[cfg(not(feature = "compression"))]
|
||||
fn append_to_compressed_archive<P: AsRef<Path>, Q: AsRef<Path>>(
|
||||
archive_path: P,
|
||||
_new_files: &[Q],
|
||||
_output_path: P,
|
||||
format: CompressionFormat,
|
||||
_snapshot_interval: Option<usize>,
|
||||
) -> Vec<Diagnostic> {
|
||||
let format_name = match format {
|
||||
CompressionFormat::Gzip => "gzip",
|
||||
CompressionFormat::Deflate => "deflate",
|
||||
CompressionFormat::Zlib => "zlib",
|
||||
CompressionFormat::Brotli => "brotli",
|
||||
CompressionFormat::Zstd => "zstd",
|
||||
CompressionFormat::None => unreachable!(),
|
||||
};
|
||||
|
||||
vec![Diagnostic::fatal(
|
||||
DiagnosticCode::UnsupportedVersion,
|
||||
format!(
|
||||
"I detected a {}-compressed archive, but this build doesn't support compression.",
|
||||
format_name
|
||||
),
|
||||
)
|
||||
.with_location(archive_path.as_ref().display().to_string(), 1)
|
||||
.with_advice(
|
||||
"This binary was built without compression support.\n\
|
||||
Install with compression: cargo install json-archive --features compression\n\
|
||||
Or decompress the file first."
|
||||
.to_string(),
|
||||
)]
|
||||
}
|
||||
|
||||
/// Generate default output filename from input filename.
|
||||
///
|
||||
/// - `test.json` -> `test.json.archive`
|
||||
/// - `test.txt` -> `test.txt.json.archive`
|
||||
/// - `test` -> `test.json.archive`
|
||||
/// - `test.json.archive` -> `test.json.archive` (unchanged)
|
||||
pub fn default_output_filename<P: AsRef<Path>>(input_path: P) -> PathBuf {
|
||||
let path = input_path.as_ref();
|
||||
let mut output = path.to_path_buf();
|
||||
|
||||
// If it already ends with .json.archive, don't modify it
|
||||
if let Some(filename) = path.file_name() {
|
||||
if let Some(filename_str) = filename.to_str() {
|
||||
if filename_str.ends_with(".json.archive") {
|
||||
return output;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Add .json.archive extension
|
||||
if let Some(extension) = path.extension() {
|
||||
if extension == "json" {
|
||||
// Replace .json with .json.archive
|
||||
output.set_extension("json.archive");
|
||||
} else {
|
||||
// Append .json.archive to whatever extension exists
|
||||
let new_extension = format!("{}.json.archive", extension.to_string_lossy());
|
||||
output.set_extension(new_extension);
|
||||
}
|
||||
} else {
|
||||
// No extension, just add .json.archive
|
||||
output.set_extension("json.archive");
|
||||
}
|
||||
|
||||
output
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use serde_json::json;
|
||||
use std::io::Write as IoWrite;
|
||||
use tempfile::NamedTempFile;
|
||||
|
||||
#[test]
|
||||
fn test_create_archive_single_file() -> Result<(), Box<dyn std::error::Error>> {
|
||||
// Create input file
|
||||
let mut input_file = NamedTempFile::new()?;
|
||||
writeln!(input_file, r#"{{"count": 0, "name": "test"}}"#)?;
|
||||
input_file.flush()?;
|
||||
|
||||
// Create output file
|
||||
let output_file = NamedTempFile::new()?;
|
||||
|
||||
let diagnostics = create_archive(
|
||||
&[input_file.path()],
|
||||
output_file.path(),
|
||||
Some("test-source".to_string()),
|
||||
None,
|
||||
);
|
||||
|
||||
assert!(diagnostics.is_empty(), "Expected no errors: {:?}", diagnostics);
|
||||
|
||||
// Verify the output
|
||||
let content = std::fs::read_to_string(output_file.path())?;
|
||||
let header: Header = serde_json::from_str(content.lines().next().unwrap())?;
|
||||
assert_eq!(header.file_type, "@peoplesgrocers/json-archive");
|
||||
assert_eq!(header.version, 1);
|
||||
assert_eq!(header.initial, json!({"count": 0, "name": "test"}));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_create_archive_multiple_files() -> Result<(), Box<dyn std::error::Error>> {
|
||||
// Create input files
|
||||
let mut file1 = NamedTempFile::new()?;
|
||||
let mut file2 = NamedTempFile::new()?;
|
||||
writeln!(file1, r#"{{"count": 0}}"#)?;
|
||||
writeln!(file2, r#"{{"count": 1}}"#)?;
|
||||
file1.flush()?;
|
||||
file2.flush()?;
|
||||
|
||||
let output_file = NamedTempFile::new()?;
|
||||
|
||||
let diagnostics = create_archive(
|
||||
&[file1.path(), file2.path()],
|
||||
output_file.path(),
|
||||
None,
|
||||
None,
|
||||
);
|
||||
|
||||
assert!(diagnostics.is_empty(), "Expected no errors: {:?}", diagnostics);
|
||||
|
||||
// Verify output has header + observation events
|
||||
let content = std::fs::read_to_string(output_file.path())?;
|
||||
let lines: Vec<&str> = content.lines().collect();
|
||||
assert!(lines.len() >= 3); // header + comment + observe + change
|
||||
|
||||
// First line should be header
|
||||
let header: Header = serde_json::from_str(lines[0])?;
|
||||
assert_eq!(header.initial, json!({"count": 0}));
|
||||
|
||||
// Should contain observe and change events
|
||||
assert!(content.contains("observe"));
|
||||
assert!(content.contains("change"));
|
||||
assert!(content.contains("/count"));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_append_to_uncompressed_archive() -> Result<(), Box<dyn std::error::Error>> {
|
||||
// Create initial archive
|
||||
let mut archive_file = NamedTempFile::new()?;
|
||||
let header = Header::new(json!({"count": 0}), None);
|
||||
writeln!(archive_file, "{}", serde_json::to_string(&header)?)?;
|
||||
archive_file.flush()?;
|
||||
|
||||
// Create file to append
|
||||
let mut new_file = NamedTempFile::new()?;
|
||||
writeln!(new_file, r#"{{"count": 1}}"#)?;
|
||||
new_file.flush()?;
|
||||
|
||||
let diagnostics = append_to_archive(
|
||||
archive_file.path(),
|
||||
&[new_file.path()],
|
||||
archive_file.path(),
|
||||
None,
|
||||
None,
|
||||
);
|
||||
|
||||
assert!(diagnostics.is_empty(), "Expected no errors: {:?}", diagnostics);
|
||||
|
||||
// Verify the archive was updated
|
||||
let content = std::fs::read_to_string(archive_file.path())?;
|
||||
assert!(content.contains("observe"));
|
||||
assert!(content.contains("change"));
|
||||
assert!(content.contains("/count"));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_default_output_filename() {
|
||||
assert_eq!(
|
||||
default_output_filename("test.json"),
|
||||
PathBuf::from("test.json.archive")
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
default_output_filename("test.txt"),
|
||||
PathBuf::from("test.txt.json.archive")
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
default_output_filename("test"),
|
||||
PathBuf::from("test.json.archive")
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
default_output_filename("test.json.archive"),
|
||||
PathBuf::from("test.json.archive")
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
@ -29,6 +29,7 @@ use crate::diagnostics::{Diagnostic, DiagnosticCode, DiagnosticCollector, Diagno
|
|||
use crate::event_deserialize::EventDeserializer;
|
||||
use crate::events::{Event, Header};
|
||||
use crate::pointer::JsonPointer;
|
||||
use crate::detection::{CompressionFormat, detect_compression_format};
|
||||
|
||||
#[cfg(feature = "compression")]
|
||||
use flate2::read::{DeflateDecoder, GzDecoder, ZlibDecoder};
|
||||
|
|
@ -43,16 +44,6 @@ pub enum ReadMode {
|
|||
AppendSeek,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
enum CompressionFormat {
|
||||
Gzip,
|
||||
Deflate,
|
||||
Zlib,
|
||||
Brotli,
|
||||
Zstd,
|
||||
None,
|
||||
}
|
||||
|
||||
pub struct ArchiveReader {
|
||||
mode: ReadMode,
|
||||
filename: String,
|
||||
|
|
@ -99,8 +90,7 @@ impl Iterator for EventIterator {
|
|||
Ok(d) => d,
|
||||
Err(e) => {
|
||||
self.diagnostics.add(
|
||||
Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
Diagnostic::fatal(
|
||||
DiagnosticCode::InvalidEventJson,
|
||||
format!("I couldn't parse this line as JSON: {}", e),
|
||||
)
|
||||
|
|
@ -137,8 +127,7 @@ impl Iterator for EventIterator {
|
|||
}
|
||||
Err(e) if e.kind() == std::io::ErrorKind::InvalidData => {
|
||||
self.diagnostics.add(
|
||||
Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
Diagnostic::fatal(
|
||||
DiagnosticCode::InvalidUtf8,
|
||||
format!("I found invalid UTF-8 bytes at line {}.", self.line_number)
|
||||
)
|
||||
|
|
@ -157,40 +146,6 @@ impl Iterator for EventIterator {
|
|||
}
|
||||
}
|
||||
|
||||
fn detect_compression_format(path: &Path, bytes: &[u8]) -> CompressionFormat {
|
||||
if bytes.len() < 4 {
|
||||
return CompressionFormat::None;
|
||||
}
|
||||
|
||||
// Gzip magic number: 0x1f 0x8b
|
||||
if bytes[0] == 0x1f && bytes[1] == 0x8b {
|
||||
return CompressionFormat::Gzip;
|
||||
}
|
||||
|
||||
// Zlib magic number: 0x78 followed by 0x01, 0x5e, 0x9c, or 0xda
|
||||
if bytes[0] == 0x78 && (bytes[1] == 0x01 || bytes[1] == 0x5e || bytes[1] == 0x9c || bytes[1] == 0xda) {
|
||||
return CompressionFormat::Zlib;
|
||||
}
|
||||
|
||||
// Zstd magic number: 0x28 0xb5 0x2f 0xfd
|
||||
if bytes.len() >= 4 && bytes[0] == 0x28 && bytes[1] == 0xb5 && bytes[2] == 0x2f && bytes[3] == 0xfd {
|
||||
return CompressionFormat::Zstd;
|
||||
}
|
||||
|
||||
// Check file extension for brotli (no reliable magic number) and deflate
|
||||
if let Some(ext) = path.extension() {
|
||||
let ext_str = ext.to_string_lossy();
|
||||
if ext_str == "br" || path.to_string_lossy().contains(".br.") {
|
||||
return CompressionFormat::Brotli;
|
||||
}
|
||||
if ext_str == "deflate" {
|
||||
return CompressionFormat::Deflate;
|
||||
}
|
||||
}
|
||||
|
||||
CompressionFormat::None
|
||||
}
|
||||
|
||||
impl ArchiveReader {
|
||||
pub fn new<P: AsRef<Path>>(path: P, mode: ReadMode) -> std::io::Result<Self> {
|
||||
let filename = path.as_ref().display().to_string();
|
||||
|
|
@ -224,8 +179,7 @@ impl ArchiveReader {
|
|||
};
|
||||
|
||||
diagnostics.add(
|
||||
Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
Diagnostic::fatal(
|
||||
DiagnosticCode::UnsupportedVersion,
|
||||
format!("I detected a {}-compressed archive, but this build doesn't support compression.", format_name)
|
||||
)
|
||||
|
|
@ -271,8 +225,7 @@ impl ArchiveReader {
|
|||
Ok(0) => {
|
||||
// Empty file
|
||||
diagnostics.add(
|
||||
Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
Diagnostic::fatal(
|
||||
DiagnosticCode::EmptyFile,
|
||||
"I found an empty file, but I need at least a header line.".to_string(),
|
||||
)
|
||||
|
|
@ -295,8 +248,7 @@ impl ArchiveReader {
|
|||
Err(e) if e.kind() == std::io::ErrorKind::InvalidData => {
|
||||
// UTF-8 error
|
||||
diagnostics.add(
|
||||
Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
Diagnostic::fatal(
|
||||
DiagnosticCode::InvalidUtf8,
|
||||
"I found invalid UTF-8 bytes at line 1.".to_string()
|
||||
)
|
||||
|
|
@ -420,8 +372,7 @@ impl ArchiveReader {
|
|||
&& !seen_observations.contains(&observation_id)
|
||||
{
|
||||
event_iter.diagnostics.add(
|
||||
Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
Diagnostic::fatal(
|
||||
DiagnosticCode::NonExistentObservationId,
|
||||
format!("I found a reference to observation '{}', but I haven't seen an observe event with that ID yet.", observation_id)
|
||||
)
|
||||
|
|
@ -447,8 +398,7 @@ impl ArchiveReader {
|
|||
&& !seen_observations.contains(&observation_id)
|
||||
{
|
||||
event_iter.diagnostics.add(
|
||||
Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
Diagnostic::fatal(
|
||||
DiagnosticCode::NonExistentObservationId,
|
||||
format!("I found a reference to observation '{}', but I haven't seen an observe event with that ID yet.", observation_id)
|
||||
)
|
||||
|
|
@ -470,8 +420,7 @@ impl ArchiveReader {
|
|||
&& !seen_observations.contains(&observation_id)
|
||||
{
|
||||
event_iter.diagnostics.add(
|
||||
Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
Diagnostic::fatal(
|
||||
DiagnosticCode::NonExistentObservationId,
|
||||
format!("I found a reference to observation '{}', but I haven't seen an observe event with that ID yet.", observation_id)
|
||||
)
|
||||
|
|
@ -493,8 +442,7 @@ impl ArchiveReader {
|
|||
&& !seen_observations.contains(&observation_id)
|
||||
{
|
||||
event_iter.diagnostics.add(
|
||||
Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
Diagnostic::fatal(
|
||||
DiagnosticCode::NonExistentObservationId,
|
||||
format!("I found a reference to observation '{}', but I haven't seen an observe event with that ID yet.", observation_id)
|
||||
)
|
||||
|
|
@ -512,8 +460,7 @@ impl ArchiveReader {
|
|||
Event::Snapshot { observation_id: _, timestamp: _, object } => {
|
||||
if self.mode == ReadMode::FullValidation && state != object {
|
||||
event_iter.diagnostics.add(
|
||||
Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
Diagnostic::fatal(
|
||||
DiagnosticCode::SnapshotStateMismatch,
|
||||
"I found a snapshot whose state doesn't match the replayed state up to this point.".to_string()
|
||||
)
|
||||
|
|
@ -566,8 +513,7 @@ impl ArchiveReader {
|
|||
Ok(v) => v,
|
||||
Err(e) => {
|
||||
diagnostics.add(
|
||||
Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
Diagnostic::fatal(
|
||||
DiagnosticCode::MissingHeader,
|
||||
format!("I couldn't parse the header as JSON: {}", e),
|
||||
)
|
||||
|
|
@ -587,8 +533,7 @@ impl ArchiveReader {
|
|||
Ok(header) => {
|
||||
if header.version != 1 {
|
||||
diagnostics.add(
|
||||
Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
Diagnostic::fatal(
|
||||
DiagnosticCode::UnsupportedVersion,
|
||||
format!("I found version {}, but I only support version 1.", header.version)
|
||||
)
|
||||
|
|
@ -606,8 +551,7 @@ impl ArchiveReader {
|
|||
}
|
||||
Err(e) => {
|
||||
diagnostics.add(
|
||||
Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
Diagnostic::fatal(
|
||||
DiagnosticCode::MissingHeaderField,
|
||||
format!("I couldn't parse the header: {}", e),
|
||||
)
|
||||
|
|
@ -666,12 +610,11 @@ pub fn apply_move(
|
|||
) -> Result<(), Diagnostic> {
|
||||
let pointer = JsonPointer::new(path)?;
|
||||
|
||||
let array = pointer.get(state)?;
|
||||
let array = pointer.get_mut(state)?;
|
||||
|
||||
if !array.is_array() {
|
||||
return Err(
|
||||
Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
Diagnostic::fatal(
|
||||
DiagnosticCode::MoveOnNonArray,
|
||||
format!(
|
||||
"I can't apply move operations to '{}' because it's not an array.",
|
||||
|
|
@ -686,48 +629,41 @@ pub fn apply_move(
|
|||
);
|
||||
}
|
||||
|
||||
let mut arr = array.as_array().unwrap().clone();
|
||||
let arr = array.as_array_mut().unwrap();
|
||||
|
||||
for (from_idx, to_idx) in moves {
|
||||
if from_idx >= arr.len() {
|
||||
return Err(
|
||||
Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::MoveIndexOutOfBounds,
|
||||
format!(
|
||||
"The 'from' index {} is out of bounds (array length is {}).",
|
||||
from_idx,
|
||||
arr.len()
|
||||
),
|
||||
)
|
||||
);
|
||||
// Validate all moves upfront before mutating
|
||||
for (from_idx, to_idx) in &moves {
|
||||
if *from_idx >= arr.len() {
|
||||
return Err(Diagnostic::fatal(
|
||||
DiagnosticCode::MoveIndexOutOfBounds,
|
||||
format!(
|
||||
"The 'from' index {} is out of bounds (array length is {}).",
|
||||
from_idx,
|
||||
arr.len()
|
||||
),
|
||||
));
|
||||
}
|
||||
|
||||
if to_idx > arr.len() {
|
||||
return Err(
|
||||
Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::MoveIndexOutOfBounds,
|
||||
format!(
|
||||
"The 'to' index {} is out of bounds (array length is {}).",
|
||||
to_idx,
|
||||
arr.len()
|
||||
),
|
||||
)
|
||||
);
|
||||
if *to_idx > arr.len() {
|
||||
return Err(Diagnostic::fatal(
|
||||
DiagnosticCode::MoveIndexOutOfBounds,
|
||||
format!(
|
||||
"The 'to' index {} is out of bounds (array length is {}).",
|
||||
to_idx,
|
||||
arr.len()
|
||||
),
|
||||
));
|
||||
}
|
||||
|
||||
let element = arr[from_idx].clone();
|
||||
arr.insert(to_idx, element);
|
||||
let remove_idx = if from_idx > to_idx {
|
||||
from_idx + 1
|
||||
} else {
|
||||
from_idx
|
||||
};
|
||||
arr.remove(remove_idx);
|
||||
}
|
||||
|
||||
pointer.set(state, Value::Array(arr))
|
||||
// Apply moves now that we know they're all valid
|
||||
for (from_idx, to_idx) in moves {
|
||||
let element = arr.remove(from_idx);
|
||||
let insert_idx = if to_idx > from_idx { to_idx - 1 } else { to_idx };
|
||||
arr.insert(insert_idx, element);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
1040
src/archive_writer.rs
Normal file
1040
src/archive_writer.rs
Normal file
File diff suppressed because it is too large
Load diff
253
src/atomic_file.rs
Normal file
253
src/atomic_file.rs
Normal file
|
|
@ -0,0 +1,253 @@
|
|||
// json-archive is a tool for tracking JSON file changes over time
|
||||
// Copyright (C) 2025 Peoples Grocers LLC
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as published
|
||||
// by the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
//
|
||||
// To purchase a license under different terms contact admin@peoplesgrocers.com
|
||||
// To request changes, report bugs, or give user feedback contact
|
||||
// marxism@peoplesgrocers.com
|
||||
//
|
||||
|
||||
//! Problem: how do you append data to a compressed archive without losing data?
|
||||
//!
|
||||
//! Gzip and similar formats don't support in-place append. To add one record
|
||||
//! to a 20GB archive, you decompress it all, add the record, and recompress.
|
||||
//!
|
||||
//! You have two options:
|
||||
//!
|
||||
//! Option A: Overwrite in place. Seek to byte 0 of the existing file and start
|
||||
//! writing the new compressed stream. No extra disk space needed. But if you
|
||||
//! fail mid-write (out of space, crash, power loss), you've corrupted the
|
||||
//! original and lost everything. With a 20GB file, that's a lot of time spent
|
||||
//! in the danger zone.
|
||||
//!
|
||||
//! Option B: Write to a new file, then swap. Requires 2x disk space temporarily,
|
||||
//! but the original stays intact until the new file is complete. If writing
|
||||
//! fails, you just delete the partial temp file.
|
||||
//!
|
||||
//! This module implements option B. I'm not comfortable with option A.
|
||||
//!
|
||||
//! The swap sequence:
|
||||
//! 1. Write new archive to `.archive.json.gz.a7bX2q`
|
||||
//! 2. Rename original to `.archive.json.gz.a7bX2q.old` (backup)
|
||||
//! 3. Rename temp to `archive.json.gz` (atomic on same filesystem)
|
||||
//! 4. Delete backup
|
||||
//!
|
||||
//! If writing fails, original is untouched. If the swap fails, we restore
|
||||
//! from backup. Data loss requires a kernel crash between steps 2 and 3.
|
||||
//!
|
||||
//! Assumes everything is on one filesystem. Cross-filesystem renames aren't
|
||||
//! atomic and we don't handle them.
|
||||
|
||||
use std::path::{Path, PathBuf};
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::diagnostics::{Diagnostic, DiagnosticCode, DiagnosticLevel};
|
||||
|
||||
/// Generate a rsync-style temporary filename with dot prefix and random suffix
|
||||
///
|
||||
/// For example: "archive.json.gz" -> ".archive.json.gz.a7bX2q"
|
||||
///
|
||||
/// The naming convention follows rsync's pattern:
|
||||
/// - Prefix with `.` to hide the file on Unix systems
|
||||
/// - Append a 6-character random suffix for uniqueness
|
||||
pub fn generate_temp_filename<P: AsRef<Path>>(path: P) -> PathBuf {
|
||||
let path = path.as_ref();
|
||||
|
||||
// Generate 6-character random suffix using first 6 hex chars of a uuid
|
||||
let uuid = Uuid::new_v4();
|
||||
let hex = format!("{:x}", uuid.as_u128());
|
||||
let random_suffix = &hex[..6];
|
||||
|
||||
// Get the filename
|
||||
if let Some(filename) = path.file_name() {
|
||||
if let Some(filename_str) = filename.to_str() {
|
||||
// Create new filename: .{original}.{random}
|
||||
let temp_filename = format!(".{}.{}", filename_str, random_suffix);
|
||||
|
||||
// Return path with new filename
|
||||
if let Some(parent) = path.parent() {
|
||||
return parent.join(temp_filename);
|
||||
} else {
|
||||
return PathBuf::from(temp_filename);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback: just add prefix and suffix to entire path
|
||||
let mut temp_path = path.to_path_buf();
|
||||
temp_path.set_file_name(format!(".{}.{}", path.display(), random_suffix));
|
||||
temp_path
|
||||
}
|
||||
|
||||
/// Atomically replace a file using rsync-style temp files
|
||||
///
|
||||
/// This performs the following sequence:
|
||||
/// 1. Write new content to temp_path (caller's responsibility - already done)
|
||||
/// 2. Move original_path -> .original_path.{random}.old (backup)
|
||||
/// 3. Move temp_path -> original_path (replace)
|
||||
/// 4. Delete .original_path.{random}.old (cleanup)
|
||||
///
|
||||
/// If any step fails, attempts to recover by restoring the backup.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `original_path` - The file to be replaced
|
||||
/// * `temp_path` - The temporary file containing the new content
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns diagnostics if any step of the operation fails. The function
|
||||
/// attempts automatic recovery by restoring the backup if the replacement fails.
|
||||
pub fn atomic_replace_file<P: AsRef<Path>>(original_path: P, temp_path: P) -> Result<(), Vec<Diagnostic>> {
|
||||
let original = original_path.as_ref();
|
||||
let temp = temp_path.as_ref();
|
||||
|
||||
// Generate backup filename with same random suffix as temp file
|
||||
let backup_path = if let Some(filename) = original.file_name() {
|
||||
if let Some(filename_str) = filename.to_str() {
|
||||
// Extract random suffix from temp filename if it follows our pattern
|
||||
let temp_filename = temp.file_name().and_then(|f| f.to_str()).unwrap_or("");
|
||||
let random_suffix = if temp_filename.starts_with('.') && temp_filename.contains(filename_str) {
|
||||
// Extract suffix after the original filename
|
||||
temp_filename.rsplit('.').next().unwrap_or("backup")
|
||||
} else {
|
||||
"backup"
|
||||
};
|
||||
|
||||
let backup_filename = format!(".{}.{}.old", filename_str, random_suffix);
|
||||
if let Some(parent) = original.parent() {
|
||||
parent.join(backup_filename)
|
||||
} else {
|
||||
PathBuf::from(backup_filename)
|
||||
}
|
||||
} else {
|
||||
original.with_extension("old")
|
||||
}
|
||||
} else {
|
||||
original.with_extension("old")
|
||||
};
|
||||
|
||||
// Step 1: Move original to backup
|
||||
if let Err(e) = std::fs::rename(original, &backup_path) {
|
||||
return Err(vec![Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't create backup of the original archive: {}", e),
|
||||
)
|
||||
.with_advice(
|
||||
"Make sure you have write permission in this directory and sufficient disk space."
|
||||
.to_string()
|
||||
)]);
|
||||
}
|
||||
|
||||
// Step 2: Move temp to original
|
||||
if let Err(e) = std::fs::rename(temp, original) {
|
||||
// Recovery: Try to restore backup
|
||||
let recovery_error = if std::fs::rename(&backup_path, original).is_ok() {
|
||||
format!(
|
||||
"I couldn't move the new archive into place: {}\nI've restored the original archive from backup.",
|
||||
e
|
||||
)
|
||||
} else {
|
||||
format!(
|
||||
"I couldn't move the new archive into place: {}\nWARNING: I also failed to restore the backup. Your original is at: {}",
|
||||
e,
|
||||
backup_path.display()
|
||||
)
|
||||
};
|
||||
|
||||
return Err(vec![Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::PathNotFound,
|
||||
recovery_error,
|
||||
)
|
||||
.with_advice(
|
||||
"Check filesystem permissions and disk space. If the backup exists, you can manually restore it."
|
||||
.to_string()
|
||||
)]);
|
||||
}
|
||||
|
||||
// Step 3: Delete backup
|
||||
// This is non-critical - if it fails, we just leave the backup around
|
||||
let _ = std::fs::remove_file(&backup_path);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::fs::File;
|
||||
use std::io::Write;
|
||||
use tempfile::NamedTempFile;
|
||||
|
||||
#[test]
|
||||
fn test_generate_temp_filename() {
|
||||
let temp = generate_temp_filename("archive.json.gz");
|
||||
let filename = temp.file_name().unwrap().to_str().unwrap();
|
||||
|
||||
// Should start with dot
|
||||
assert!(filename.starts_with('.'));
|
||||
|
||||
// Should contain original filename
|
||||
assert!(filename.contains("archive.json.gz"));
|
||||
|
||||
// Should have a random suffix (dot followed by 6 chars)
|
||||
assert!(filename.matches('.').count() >= 3); // .archive.json.gz has 2, plus 1 before random
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_atomic_replace_file() -> Result<(), Box<dyn std::error::Error>> {
|
||||
// Create original file
|
||||
let mut original = NamedTempFile::new()?;
|
||||
writeln!(original, "original content")?;
|
||||
original.flush()?;
|
||||
let original_path = original.path().to_path_buf();
|
||||
|
||||
// Create temp file with new content
|
||||
let temp_path = generate_temp_filename(&original_path);
|
||||
{
|
||||
let mut temp_file = File::create(&temp_path)?;
|
||||
writeln!(temp_file, "new content")?;
|
||||
}
|
||||
|
||||
// Perform atomic replace
|
||||
atomic_replace_file(&original_path, &temp_path)
|
||||
.map_err(|e| format!("Failed to replace file: {:?}", e))?;
|
||||
|
||||
// Verify new content
|
||||
let content = std::fs::read_to_string(&original_path)?;
|
||||
assert_eq!(content.trim(), "new content");
|
||||
|
||||
// Verify temp file is gone
|
||||
assert!(!temp_path.exists());
|
||||
|
||||
// Verify backup is cleaned up
|
||||
let backup_pattern = format!(".{}.", original_path.file_name().unwrap().to_str().unwrap());
|
||||
let parent = original_path.parent().unwrap();
|
||||
let backups: Vec<_> = std::fs::read_dir(parent)?
|
||||
.filter_map(|e| e.ok())
|
||||
.filter(|e| {
|
||||
e.file_name()
|
||||
.to_str()
|
||||
.map(|s| s.contains(&backup_pattern) && s.ends_with(".old"))
|
||||
.unwrap_or(false)
|
||||
})
|
||||
.collect();
|
||||
assert_eq!(backups.len(), 0, "Backup file should be cleaned up");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
135
src/bin/pointer_errors_demo.rs
Normal file
135
src/bin/pointer_errors_demo.rs
Normal file
|
|
@ -0,0 +1,135 @@
|
|||
// Generates documentation for JSON pointer diagnostics.
|
||||
//
|
||||
// Run with: cargo run --bin pointer_errors_demo > docs/diagnostics/json-pointer.md
|
||||
|
||||
use json_archive::JsonPointer;
|
||||
use serde_json::json;
|
||||
|
||||
fn print_example(pointer_str: &str, value: &mut serde_json::Value) {
|
||||
println!("```");
|
||||
let pointer = JsonPointer::new(pointer_str).unwrap();
|
||||
if let Err(diag) = pointer.get_mut(value) {
|
||||
print!("{}", diag);
|
||||
}
|
||||
println!("```");
|
||||
}
|
||||
|
||||
fn main() {
|
||||
print!(r#"<!-- Generated by: cargo run --bin pointer_errors_demo > docs/diagnostics/json-pointer.md -->
|
||||
|
||||
# JSON Pointer Diagnostics
|
||||
|
||||
These are the error messages you'll see when a [JSON Pointer (RFC 6901)](https://datatracker.ietf.org/doc/html/rfc6901)
|
||||
operation fails.
|
||||
|
||||
## Why These Errors Are Limited
|
||||
|
||||
The JSON object that failed to index probably doesn't exist anywhere as a file. It's
|
||||
built by replaying delta events from the archive. The filename and line numbers in
|
||||
these errors point to the source of the JSON pointer paths—the add/change/remove
|
||||
events in the archive—not to the object itself.
|
||||
|
||||
A proper solution would dump the reconstructed JSON object to a file so you could
|
||||
inspect it with `jq` or a text editor. That engineering work didn't happen.
|
||||
|
||||
Instead, you get:
|
||||
|
||||
- The pointer path that failed, with the failing segment underlined
|
||||
- The actual value at the parent path (truncated)
|
||||
- Some strings you can grep for in the archive
|
||||
|
||||
This is better than nothing, but it's still awkward. You can see *what* failed but
|
||||
not easily inspect the full object we tried to index into. If you're lucky, the
|
||||
truncated value shown is enough. If you're developing on this project, at least
|
||||
you know what the errors look like.
|
||||
|
||||
## Contributing
|
||||
|
||||
If an error message is confusing or unhelpful for your case, please open an issue
|
||||
or submit a pull request.
|
||||
|
||||
## Key Not Found
|
||||
|
||||
Key doesn't exist in the object. Shows available keys and suggests typos.
|
||||
|
||||
"#);
|
||||
|
||||
print_example(
|
||||
"/user/emial",
|
||||
&mut json!({
|
||||
"user": {
|
||||
"name": "Alice",
|
||||
"email": "alice@example.com",
|
||||
"age": 30
|
||||
}
|
||||
}),
|
||||
);
|
||||
|
||||
print!(r#"
|
||||
## Type Mismatch
|
||||
|
||||
Tried to index into a value that doesn't support it (e.g., `/domain` on a string,
|
||||
`/0` on a number). Shows the actual type.
|
||||
|
||||
"#);
|
||||
|
||||
print_example(
|
||||
"/users/0/email/domain",
|
||||
&mut json!({
|
||||
"users": [
|
||||
{"email": "alice@example.com"}
|
||||
]
|
||||
}),
|
||||
);
|
||||
|
||||
print!(r#"
|
||||
## Array Index Out of Bounds
|
||||
|
||||
Index past the end of the array. Shows the array length.
|
||||
|
||||
"#);
|
||||
|
||||
print_example(
|
||||
"/items/5",
|
||||
&mut json!({
|
||||
"items": ["apple", "banana", "cherry"]
|
||||
}),
|
||||
);
|
||||
|
||||
print!(r#"
|
||||
## Array Index
|
||||
|
||||
If you think you have an object but you're actually indexing into an array, you'll see this error.
|
||||
|
||||
"#);
|
||||
|
||||
print_example(
|
||||
"/items/foo",
|
||||
&mut json!({
|
||||
"items": ["apple", "banana", "cherry"]
|
||||
}),
|
||||
);
|
||||
|
||||
print!(r#"
|
||||
## Deep Path Failures
|
||||
|
||||
For long paths, the underline shows which segment failed. The full path remains
|
||||
visible so you can see what you were trying to reach.
|
||||
|
||||
"#);
|
||||
|
||||
print_example(
|
||||
"/data/users/0/profile/settings/theme",
|
||||
&mut json!({
|
||||
"data": {
|
||||
"users": [
|
||||
{
|
||||
"profile": {
|
||||
"name": "Alice"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}),
|
||||
);
|
||||
}
|
||||
109
src/detection.rs
109
src/detection.rs
|
|
@ -31,9 +31,16 @@
|
|||
//! Design choice by @nobody. No user requests for this, just seemed nice.
|
||||
|
||||
use std::fs::File;
|
||||
use std::io::{BufRead, BufReader};
|
||||
use std::io::{BufRead, BufReader, Read};
|
||||
use std::path::Path;
|
||||
|
||||
#[cfg(feature = "compression")]
|
||||
use brotli::Decompressor;
|
||||
#[cfg(feature = "compression")]
|
||||
use flate2::read::{DeflateDecoder, GzDecoder, ZlibDecoder};
|
||||
#[cfg(feature = "compression")]
|
||||
use zstd::stream::read::Decoder as ZstdDecoder;
|
||||
|
||||
/// Detects if a file is a JSON archive by checking file extension or inspecting the header.
|
||||
///
|
||||
/// Detection strategy:
|
||||
|
|
@ -52,20 +59,65 @@ use std::path::Path;
|
|||
pub fn is_json_archive<P: AsRef<Path>>(path: P) -> Result<bool, std::io::Error> {
|
||||
let path = path.as_ref();
|
||||
|
||||
// Check file extension first (fast path)
|
||||
if let Some(filename) = path.file_name() {
|
||||
if let Some(filename_str) = filename.to_str() {
|
||||
if filename_str.ends_with(".json.archive") {
|
||||
// Match .json.archive with any compression suffix
|
||||
if filename_str.ends_with(".json.archive")
|
||||
|| filename_str.ends_with(".json.archive.gz")
|
||||
|| filename_str.ends_with(".json.archive.br")
|
||||
|| filename_str.ends_with(".json.archive.zst")
|
||||
|| filename_str.ends_with(".json.archive.zlib")
|
||||
{
|
||||
return Ok(true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let file = File::open(path)?;
|
||||
let mut reader = BufReader::new(file);
|
||||
// Open file and detect compression
|
||||
let mut file = File::open(path)?;
|
||||
let mut magic_bytes = [0u8; 4];
|
||||
let bytes_read = file.read(&mut magic_bytes)?;
|
||||
let compression = detect_compression_format(path, &magic_bytes[..bytes_read]);
|
||||
|
||||
// Reopen file to reset position
|
||||
file = File::open(path)?;
|
||||
|
||||
// Create appropriate reader based on compression format
|
||||
let reader: Box<dyn BufRead> = create_reader(file, compression)?;
|
||||
|
||||
check_header_line(reader)
|
||||
}
|
||||
|
||||
/// Create a buffered reader that handles decompression if needed.
|
||||
#[cfg(feature = "compression")]
|
||||
fn create_reader(file: File, compression: CompressionFormat) -> Result<Box<dyn BufRead>, std::io::Error> {
|
||||
Ok(match compression {
|
||||
CompressionFormat::Gzip => Box::new(BufReader::new(GzDecoder::new(file))),
|
||||
CompressionFormat::Deflate => Box::new(BufReader::new(DeflateDecoder::new(file))),
|
||||
CompressionFormat::Zlib => Box::new(BufReader::new(ZlibDecoder::new(file))),
|
||||
CompressionFormat::Brotli => Box::new(BufReader::new(Decompressor::new(file, 4096))),
|
||||
CompressionFormat::Zstd => Box::new(BufReader::new(ZstdDecoder::new(file)?)),
|
||||
CompressionFormat::None => Box::new(BufReader::new(file)),
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "compression"))]
|
||||
fn create_reader(file: File, compression: CompressionFormat) -> Result<Box<dyn BufRead>, std::io::Error> {
|
||||
if compression != CompressionFormat::None {
|
||||
// Without compression support, we can't decompress to check the header.
|
||||
// Return false by returning an empty reader that will fail header check.
|
||||
return Ok(Box::new(BufReader::new(std::io::empty())));
|
||||
}
|
||||
Ok(Box::new(BufReader::new(file)))
|
||||
}
|
||||
|
||||
/// Check if the first line of the reader contains a valid archive header.
|
||||
fn check_header_line(mut reader: Box<dyn BufRead>) -> Result<bool, std::io::Error> {
|
||||
let mut first_line = String::new();
|
||||
|
||||
match reader.read_line(&mut first_line) {
|
||||
Ok(0) => return Ok(false), // Empty file
|
||||
Ok(0) => Ok(false), // Empty file
|
||||
Ok(_) => {
|
||||
// Try to parse as JSON and check if it has our type field as the first key
|
||||
if let Ok(value) = serde_json::from_str::<serde_json::Value>(&first_line) {
|
||||
|
|
@ -81,11 +133,54 @@ pub fn is_json_archive<P: AsRef<Path>>(path: P) -> Result<bool, std::io::Error>
|
|||
}
|
||||
}
|
||||
}
|
||||
Ok(false)
|
||||
}
|
||||
Err(e) => return Err(e),
|
||||
Err(e) => Err(e),
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum CompressionFormat {
|
||||
Gzip,
|
||||
Deflate,
|
||||
Zlib,
|
||||
Brotli,
|
||||
Zstd,
|
||||
None,
|
||||
}
|
||||
|
||||
pub fn detect_compression_format(path: &Path, bytes: &[u8]) -> CompressionFormat {
|
||||
if bytes.len() < 4 {
|
||||
return CompressionFormat::None;
|
||||
}
|
||||
|
||||
Ok(false)
|
||||
// Gzip magic number: 0x1f 0x8b
|
||||
if bytes[0] == 0x1f && bytes[1] == 0x8b {
|
||||
return CompressionFormat::Gzip;
|
||||
}
|
||||
|
||||
// Zlib magic number: 0x78 followed by 0x01, 0x5e, 0x9c, or 0xda
|
||||
if bytes[0] == 0x78 && (bytes[1] == 0x01 || bytes[1] == 0x5e || bytes[1] == 0x9c || bytes[1] == 0xda) {
|
||||
return CompressionFormat::Zlib;
|
||||
}
|
||||
|
||||
// Zstd magic number: 0x28 0xb5 0x2f 0xfd
|
||||
if bytes.len() >= 4 && bytes[0] == 0x28 && bytes[1] == 0xb5 && bytes[2] == 0x2f && bytes[3] == 0xfd {
|
||||
return CompressionFormat::Zstd;
|
||||
}
|
||||
|
||||
// Check file extension for brotli (no reliable magic number) and deflate
|
||||
if let Some(ext) = path.extension() {
|
||||
let ext_str = ext.to_string_lossy();
|
||||
if ext_str == "br" || path.to_string_lossy().contains(".br.") {
|
||||
return CompressionFormat::Brotli;
|
||||
}
|
||||
if ext_str == "deflate" {
|
||||
return CompressionFormat::Deflate;
|
||||
}
|
||||
}
|
||||
|
||||
CompressionFormat::None
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
|
|||
|
|
@ -188,6 +188,11 @@ impl Diagnostic {
|
|||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn fatal(code: DiagnosticCode, description: String) -> Self {
|
||||
Self::new(DiagnosticLevel::Fatal, code, description)
|
||||
}
|
||||
|
||||
pub fn with_location(mut self, filename: String, line_number: usize) -> Self {
|
||||
self.filename = Some(filename);
|
||||
self.line_number = Some(line_number);
|
||||
|
|
|
|||
|
|
@ -24,10 +24,13 @@ use std::path::PathBuf;
|
|||
xflags::xflags! {
|
||||
cmd json-archive {
|
||||
default cmd create {
|
||||
/// Input JSON files in chronological order (first file determines default output name)
|
||||
/// Input JSON files in chronological order. If first file is a .json.archive file,
|
||||
/// appends remaining files to it. Otherwise creates a new archive from all files.
|
||||
repeated inputs: PathBuf
|
||||
|
||||
/// Output archive file path (defaults to first input + .json.archive)
|
||||
/// Output archive file path. Defaults to <first-input>.json.archive for new archives,
|
||||
/// or to the archive path itself when appending (in-place update). Use -o to write
|
||||
/// the result to a different location.
|
||||
optional -o, --output output: PathBuf
|
||||
|
||||
/// Insert snapshot every N observations (optional)
|
||||
|
|
|
|||
13
src/lib.rs
13
src/lib.rs
|
|
@ -19,7 +19,12 @@
|
|||
// marxism@peoplesgrocers.com
|
||||
//
|
||||
|
||||
pub mod archive;
|
||||
pub mod archive_context;
|
||||
pub mod archive_open;
|
||||
pub mod archive_ops;
|
||||
pub mod archive_reader;
|
||||
pub mod archive_writer;
|
||||
pub mod atomic_file;
|
||||
pub mod detection;
|
||||
pub mod diagnostics;
|
||||
pub mod diff;
|
||||
|
|
@ -27,13 +32,13 @@ pub mod event_deserialize;
|
|||
pub mod events;
|
||||
pub mod flags;
|
||||
pub mod pointer;
|
||||
pub mod reader;
|
||||
mod pointer_errors;
|
||||
|
||||
pub use archive::{
|
||||
pub use archive_writer::{
|
||||
append_to_archive, create_archive_from_files, default_output_filename, ArchiveBuilder, ArchiveWriter,
|
||||
};
|
||||
pub use detection::is_json_archive;
|
||||
pub use diagnostics::{Diagnostic, DiagnosticCode, DiagnosticCollector, DiagnosticLevel};
|
||||
pub use events::{Event, Header, Observation};
|
||||
pub use pointer::JsonPointer;
|
||||
pub use reader::{apply_add, apply_change, apply_move, apply_remove, ArchiveReader, ReadMode, ReadResult};
|
||||
pub use archive_reader::{apply_add, apply_change, apply_move, apply_remove, ArchiveReader, ReadMode, ReadResult};
|
||||
|
|
|
|||
154
src/main.rs
154
src/main.rs
|
|
@ -19,10 +19,8 @@
|
|||
// marxism@peoplesgrocers.com
|
||||
//
|
||||
|
||||
use json_archive::{
|
||||
append_to_archive, create_archive_from_files, default_output_filename, is_json_archive, Diagnostic,
|
||||
DiagnosticCode, DiagnosticLevel,
|
||||
};
|
||||
use json_archive::archive_ops::{append_to_archive, create_archive, default_output_filename};
|
||||
use json_archive::{is_json_archive, Diagnostic, DiagnosticCode, DiagnosticLevel};
|
||||
use std::path::Path;
|
||||
use std::process;
|
||||
|
||||
|
|
@ -46,15 +44,22 @@ fn main() {
|
|||
|
||||
fn run(flags: flags::JsonArchive) -> Vec<Diagnostic> {
|
||||
match flags.subcommand {
|
||||
flags::JsonArchiveCmd::Create(create_flags) => create_archive(&create_flags),
|
||||
flags::JsonArchiveCmd::Create(create_flags) => run_create(&create_flags),
|
||||
flags::JsonArchiveCmd::Info(info_flags) => cmd::info::run(&info_flags),
|
||||
flags::JsonArchiveCmd::State(state_flags) => cmd::state::run(&state_flags),
|
||||
}
|
||||
}
|
||||
|
||||
fn create_archive(flags: &flags::Create) -> Vec<Diagnostic> {
|
||||
struct ParsedCreateArgs {
|
||||
destination: std::path::PathBuf,
|
||||
input_files: Vec<std::path::PathBuf>,
|
||||
}
|
||||
|
||||
/// Parse the create command arguments to determine the destination archive and input files.
|
||||
/// This consolidates all the inferring behavior in one place.
|
||||
fn parse_create_args(flags: &flags::Create) -> Result<ParsedCreateArgs, Vec<Diagnostic>> {
|
||||
if flags.inputs.is_empty() {
|
||||
return vec![Diagnostic::new(
|
||||
return Err(vec![Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::MissingHeaderField,
|
||||
"I need at least one JSON file to create an archive, but you didn't provide any."
|
||||
|
|
@ -65,16 +70,53 @@ fn create_archive(flags: &flags::Create) -> Vec<Diagnostic> {
|
|||
The first file will be used as the initial state, and subsequent files \
|
||||
will be compared to generate change events."
|
||||
.to_string(),
|
||||
)];
|
||||
)]);
|
||||
}
|
||||
|
||||
let output_path = match &flags.output {
|
||||
Some(path) => path.clone(),
|
||||
None => default_output_filename(&flags.inputs[0]),
|
||||
// Determine the destination archive path
|
||||
let destination = if let Some(output) = &flags.output {
|
||||
// Explicitly specified output path
|
||||
output.clone()
|
||||
} else if Path::new(&flags.inputs[0]).exists()
|
||||
&& is_json_archive(&flags.inputs[0]).unwrap_or(false)
|
||||
{
|
||||
// First input is an existing archive - use it as destination
|
||||
flags.inputs[0].clone()
|
||||
} else {
|
||||
// Infer from first input
|
||||
default_output_filename(&flags.inputs[0])
|
||||
};
|
||||
|
||||
// Filter out the destination from input files to avoid read-write conflicts
|
||||
let input_files: Vec<_> = flags.inputs
|
||||
.iter()
|
||||
.filter(|path| {
|
||||
match (std::fs::canonicalize(path).ok(), std::fs::canonicalize(&destination).ok()) {
|
||||
(Some(p), Some(d)) => p != d,
|
||||
_ => true, // Include if canonicalization fails (file doesn't exist yet)
|
||||
}
|
||||
})
|
||||
.cloned()
|
||||
.collect();
|
||||
|
||||
if input_files.is_empty() {
|
||||
return Err(vec![
|
||||
Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::MissingHeaderField,
|
||||
"No input files remain after filtering out the destination archive.".to_string()
|
||||
)
|
||||
.with_advice(
|
||||
"You specified the output path in the list of input files. This would cause a read-write conflict.\n\
|
||||
Either remove the output path from inputs, or use a different output path with -o."
|
||||
.to_string()
|
||||
)
|
||||
]);
|
||||
}
|
||||
|
||||
// Validate all input files exist
|
||||
let mut diagnostics = Vec::new();
|
||||
for input_path in &flags.inputs {
|
||||
for input_path in &input_files {
|
||||
if !Path::new(input_path).exists() {
|
||||
diagnostics.push(
|
||||
Diagnostic::new(
|
||||
|
|
@ -92,43 +134,21 @@ fn create_archive(flags: &flags::Create) -> Vec<Diagnostic> {
|
|||
}
|
||||
|
||||
if !diagnostics.is_empty() {
|
||||
return diagnostics;
|
||||
return Err(diagnostics);
|
||||
}
|
||||
|
||||
let first_is_archive = match is_json_archive(&flags.inputs[0]) {
|
||||
Ok(is_archive) => is_archive,
|
||||
Err(e) => {
|
||||
return vec![Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't check if the first file is an archive: {}", e),
|
||||
)];
|
||||
}
|
||||
Ok(ParsedCreateArgs {
|
||||
destination,
|
||||
input_files,
|
||||
})
|
||||
}
|
||||
|
||||
fn run_create(flags: &flags::Create) -> Vec<Diagnostic> {
|
||||
let parsed = match parse_create_args(flags) {
|
||||
Ok(parsed) => parsed,
|
||||
Err(diagnostics) => return diagnostics,
|
||||
};
|
||||
|
||||
if first_is_archive {
|
||||
println!("First input appears to be a JSON archive file");
|
||||
if flags.inputs.len() == 1 {
|
||||
return vec![
|
||||
Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::MissingHeaderField,
|
||||
"I found that the first input is already an archive file, but you didn't provide any additional JSON files to append.".to_string()
|
||||
)
|
||||
.with_advice(
|
||||
"If you want to append to an archive, provide additional JSON files:\n\
|
||||
json-archive existing.json.archive new1.json new2.json"
|
||||
.to_string()
|
||||
)
|
||||
];
|
||||
}
|
||||
|
||||
return append_to_archive(&flags.inputs[0], &flags.inputs[1..], &output_path, flags.source.clone(), flags.snapshot_interval);
|
||||
}
|
||||
|
||||
println!("Creating archive: {}", output_path.display());
|
||||
println!("Input files: {:?}", flags.inputs);
|
||||
|
||||
if let Some(interval) = flags.snapshot_interval {
|
||||
println!("Snapshot interval: every {} observations", interval);
|
||||
}
|
||||
|
|
@ -137,16 +157,42 @@ fn create_archive(flags: &flags::Create) -> Vec<Diagnostic> {
|
|||
println!("Source: {}", source);
|
||||
}
|
||||
|
||||
match create_archive_from_files(
|
||||
&flags.inputs,
|
||||
output_path.clone(),
|
||||
// If destination exists and is an archive, append to it
|
||||
if Path::new(&parsed.destination).exists() {
|
||||
if let Ok(true) = is_json_archive(&parsed.destination) {
|
||||
println!("Appending to existing archive: {}", parsed.destination.display());
|
||||
println!("Input files: {:?}", parsed.input_files);
|
||||
|
||||
let diagnostics = append_to_archive(
|
||||
&parsed.destination,
|
||||
&parsed.input_files,
|
||||
&parsed.destination,
|
||||
flags.source.clone(),
|
||||
flags.snapshot_interval,
|
||||
);
|
||||
|
||||
if diagnostics.is_empty() {
|
||||
println!("Archive updated successfully: {}", parsed.destination.display());
|
||||
}
|
||||
|
||||
return diagnostics;
|
||||
}
|
||||
}
|
||||
|
||||
// Otherwise create a new archive from the input files
|
||||
println!("Creating new archive: {}", parsed.destination.display());
|
||||
println!("Input files: {:?}", parsed.input_files);
|
||||
|
||||
let diagnostics = create_archive(
|
||||
&parsed.input_files,
|
||||
parsed.destination.clone(),
|
||||
flags.source.clone(),
|
||||
flags.snapshot_interval,
|
||||
) {
|
||||
Ok(()) => {
|
||||
println!("Archive created successfully: {}", output_path.display());
|
||||
Vec::new()
|
||||
}
|
||||
Err(diagnostics) => diagnostics,
|
||||
);
|
||||
|
||||
if diagnostics.is_empty() {
|
||||
println!("Archive created successfully: {}", parsed.destination.display());
|
||||
}
|
||||
|
||||
diagnostics
|
||||
}
|
||||
|
|
|
|||
238
src/pointer.rs
238
src/pointer.rs
|
|
@ -19,7 +19,11 @@
|
|||
// marxism@peoplesgrocers.com
|
||||
//
|
||||
|
||||
use crate::diagnostics::{Diagnostic, DiagnosticCode, DiagnosticLevel};
|
||||
use crate::diagnostics::{Diagnostic, DiagnosticCode};
|
||||
use crate::pointer_errors::{
|
||||
build_array_index_out_of_bounds_error, build_invalid_array_index_error,
|
||||
build_key_not_found_error, build_type_mismatch_error,
|
||||
};
|
||||
use serde_json::Value;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
|
|
@ -34,8 +38,7 @@ impl JsonPointer {
|
|||
}
|
||||
|
||||
if !path.starts_with('/') {
|
||||
return Err(Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
return Err(Diagnostic::fatal(
|
||||
DiagnosticCode::InvalidPointerSyntax,
|
||||
format!(
|
||||
"I couldn't parse the path '{}': Path must start with '/'",
|
||||
|
|
@ -52,49 +55,52 @@ impl JsonPointer {
|
|||
Ok(JsonPointer { tokens })
|
||||
}
|
||||
|
||||
pub fn get<'a>(&self, value: &'a Value) -> Result<&'a Value, Diagnostic> {
|
||||
/// Traverse the JSON value following this pointer, returning a mutable reference.
|
||||
///
|
||||
/// Errors include rich context: the full path, which segment failed, the value
|
||||
/// at that point, and suggestions for typos. See `pointer_errors` module for details.
|
||||
pub fn get_mut<'a>(&self, value: &'a mut Value) -> Result<&'a mut Value, Diagnostic> {
|
||||
let mut current = value;
|
||||
|
||||
for token in &self.tokens {
|
||||
for (token_index, token) in self.tokens.iter().enumerate() {
|
||||
match current {
|
||||
Value::Object(obj) => {
|
||||
current = obj.get(token).ok_or_else(|| {
|
||||
Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't find the key '{}'", token),
|
||||
)
|
||||
})?;
|
||||
if obj.contains_key(token) {
|
||||
current = obj.get_mut(token).unwrap();
|
||||
} else {
|
||||
let keys: Vec<String> = obj.keys().cloned().collect();
|
||||
let key_refs: Vec<&str> = keys.iter().map(|s| s.as_str()).collect();
|
||||
return Err(build_key_not_found_error(
|
||||
&self.tokens,
|
||||
token_index,
|
||||
token,
|
||||
&key_refs,
|
||||
));
|
||||
}
|
||||
}
|
||||
Value::Array(arr) => {
|
||||
let arr_len = arr.len();
|
||||
let index = token.parse::<usize>().map_err(|_| {
|
||||
Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::InvalidArrayIndex,
|
||||
format!("I couldn't parse '{}' as an array index", token),
|
||||
)
|
||||
})?;
|
||||
current = arr.get(index).ok_or_else(|| {
|
||||
Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!(
|
||||
"I couldn't find index {} (array length is {})",
|
||||
index,
|
||||
arr.len()
|
||||
),
|
||||
)
|
||||
build_invalid_array_index_error(&self.tokens, token_index, token, arr)
|
||||
})?;
|
||||
if index < arr_len {
|
||||
current = &mut arr[index];
|
||||
} else {
|
||||
return Err(build_array_index_out_of_bounds_error(
|
||||
&self.tokens,
|
||||
token_index,
|
||||
index,
|
||||
arr_len,
|
||||
arr,
|
||||
));
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
return Err(Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::TypeMismatch,
|
||||
format!(
|
||||
"I can't index into {} with '{}'",
|
||||
current.type_name(),
|
||||
token
|
||||
),
|
||||
return Err(build_type_mismatch_error(
|
||||
&self.tokens,
|
||||
token_index,
|
||||
token,
|
||||
current,
|
||||
));
|
||||
}
|
||||
}
|
||||
|
|
@ -103,68 +109,32 @@ impl JsonPointer {
|
|||
Ok(current)
|
||||
}
|
||||
|
||||
/// Returns the parent pointer (all tokens except the last).
|
||||
///
|
||||
/// Used by `set` and `remove`: to modify a value, we need a mutable reference
|
||||
/// to its parent container (object or array), then operate on the final key/index.
|
||||
fn parent(&self) -> JsonPointer {
|
||||
JsonPointer {
|
||||
tokens: self.tokens[..self.tokens.len() - 1].to_vec(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn set(&self, value: &mut Value, new_value: Value) -> Result<(), Diagnostic> {
|
||||
if self.tokens.is_empty() {
|
||||
*value = new_value;
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let mut current = value;
|
||||
let last_token = &self.tokens[self.tokens.len() - 1];
|
||||
let parent = self.parent().get_mut(value)?;
|
||||
|
||||
for token in &self.tokens[..self.tokens.len() - 1] {
|
||||
match current {
|
||||
Value::Object(obj) => {
|
||||
current = obj.get_mut(token).ok_or_else(|| {
|
||||
Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't find the key '{}'", token),
|
||||
)
|
||||
})?;
|
||||
}
|
||||
Value::Array(arr) => {
|
||||
let index = token.parse::<usize>().map_err(|_| {
|
||||
Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::InvalidArrayIndex,
|
||||
format!("I couldn't parse '{}' as an array index", token),
|
||||
)
|
||||
})?;
|
||||
let array_len = arr.len();
|
||||
current = arr.get_mut(index).ok_or_else(|| {
|
||||
Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!(
|
||||
"I couldn't find index {} (array length is {})",
|
||||
index, array_len
|
||||
),
|
||||
)
|
||||
})?;
|
||||
}
|
||||
_ => {
|
||||
return Err(Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::TypeMismatch,
|
||||
format!(
|
||||
"I can't index into {} with '{}'",
|
||||
current.type_name(),
|
||||
token
|
||||
),
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
match current {
|
||||
match parent {
|
||||
Value::Object(obj) => {
|
||||
obj.insert(last_token.clone(), new_value);
|
||||
}
|
||||
Value::Array(arr) => {
|
||||
let index = last_token.parse::<usize>().map_err(|_| {
|
||||
Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
Diagnostic::fatal(
|
||||
DiagnosticCode::InvalidArrayIndex,
|
||||
format!("I couldn't parse '{}' as an array index", last_token),
|
||||
)
|
||||
|
|
@ -175,8 +145,7 @@ impl JsonPointer {
|
|||
} else if index < arr.len() {
|
||||
arr[index] = new_value;
|
||||
} else {
|
||||
return Err(Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
return Err(Diagnostic::fatal(
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!(
|
||||
"I couldn't set index {} (array length is {})",
|
||||
|
|
@ -187,13 +156,12 @@ impl JsonPointer {
|
|||
}
|
||||
}
|
||||
_ => {
|
||||
return Err(Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
return Err(Diagnostic::fatal(
|
||||
DiagnosticCode::TypeMismatch,
|
||||
format!(
|
||||
"I can't set property '{}' on {}",
|
||||
last_token,
|
||||
current.type_name()
|
||||
parent.type_name()
|
||||
),
|
||||
));
|
||||
}
|
||||
|
|
@ -204,73 +172,25 @@ impl JsonPointer {
|
|||
|
||||
pub fn remove(&self, value: &mut Value) -> Result<Value, Diagnostic> {
|
||||
if self.tokens.is_empty() {
|
||||
return Err(Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
return Err(Diagnostic::fatal(
|
||||
DiagnosticCode::InvalidPointerSyntax,
|
||||
"I can't remove the root value".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
let mut current = value;
|
||||
let last_token = &self.tokens[self.tokens.len() - 1];
|
||||
let parent = self.parent().get_mut(value)?;
|
||||
|
||||
for token in &self.tokens[..self.tokens.len() - 1] {
|
||||
match current {
|
||||
Value::Object(obj) => {
|
||||
current = obj.get_mut(token).ok_or_else(|| {
|
||||
Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't find the key '{}'", token),
|
||||
)
|
||||
})?;
|
||||
}
|
||||
Value::Array(arr) => {
|
||||
let index = token.parse::<usize>().map_err(|_| {
|
||||
Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::InvalidArrayIndex,
|
||||
format!("I couldn't parse '{}' as an array index", token),
|
||||
)
|
||||
})?;
|
||||
let array_len = arr.len();
|
||||
current = arr.get_mut(index).ok_or_else(|| {
|
||||
Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!(
|
||||
"I couldn't find index {} (array length is {})",
|
||||
index, array_len
|
||||
),
|
||||
)
|
||||
})?;
|
||||
}
|
||||
_ => {
|
||||
return Err(Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::TypeMismatch,
|
||||
format!(
|
||||
"I can't index into {} with '{}'",
|
||||
current.type_name(),
|
||||
token
|
||||
),
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
match current {
|
||||
match parent {
|
||||
Value::Object(obj) => obj.remove(last_token).ok_or_else(|| {
|
||||
Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
Diagnostic::fatal(
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't find the key '{}' to remove", last_token),
|
||||
)
|
||||
}),
|
||||
Value::Array(arr) => {
|
||||
let index = last_token.parse::<usize>().map_err(|_| {
|
||||
Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
Diagnostic::fatal(
|
||||
DiagnosticCode::InvalidArrayIndex,
|
||||
format!("I couldn't parse '{}' as an array index", last_token),
|
||||
)
|
||||
|
|
@ -279,8 +199,7 @@ impl JsonPointer {
|
|||
if index < arr.len() {
|
||||
Ok(arr.remove(index))
|
||||
} else {
|
||||
Err(Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
Err(Diagnostic::fatal(
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!(
|
||||
"I couldn't remove index {} (array length is {})",
|
||||
|
|
@ -290,13 +209,12 @@ impl JsonPointer {
|
|||
))
|
||||
}
|
||||
}
|
||||
_ => Err(Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
_ => Err(Diagnostic::fatal(
|
||||
DiagnosticCode::TypeMismatch,
|
||||
format!(
|
||||
"I can't remove property '{}' from {}",
|
||||
last_token,
|
||||
current.type_name()
|
||||
parent.type_name()
|
||||
),
|
||||
)),
|
||||
}
|
||||
|
|
@ -342,40 +260,40 @@ mod tests {
|
|||
#[test]
|
||||
fn test_empty_pointer() {
|
||||
let pointer = JsonPointer::new("").unwrap();
|
||||
let value = json!({"foo": "bar"});
|
||||
assert_eq!(pointer.get(&value).unwrap(), &value);
|
||||
let mut value = json!({"foo": "bar"});
|
||||
assert_eq!(pointer.get_mut(&mut value).unwrap(), &json!({"foo": "bar"}));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_simple_object_access() {
|
||||
let pointer = JsonPointer::new("/foo").unwrap();
|
||||
let value = json!({"foo": "bar"});
|
||||
assert_eq!(pointer.get(&value).unwrap(), &json!("bar"));
|
||||
let mut value = json!({"foo": "bar"});
|
||||
assert_eq!(pointer.get_mut(&mut value).unwrap(), &json!("bar"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_nested_object_access() {
|
||||
let pointer = JsonPointer::new("/foo/bar").unwrap();
|
||||
let value = json!({"foo": {"bar": "baz"}});
|
||||
assert_eq!(pointer.get(&value).unwrap(), &json!("baz"));
|
||||
let mut value = json!({"foo": {"bar": "baz"}});
|
||||
assert_eq!(pointer.get_mut(&mut value).unwrap(), &json!("baz"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_array_access() {
|
||||
let pointer = JsonPointer::new("/items/0").unwrap();
|
||||
let value = json!({"items": ["first", "second"]});
|
||||
assert_eq!(pointer.get(&value).unwrap(), &json!("first"));
|
||||
let mut value = json!({"items": ["first", "second"]});
|
||||
assert_eq!(pointer.get_mut(&mut value).unwrap(), &json!("first"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_escape_sequences() {
|
||||
let pointer = JsonPointer::new("/foo~1bar").unwrap();
|
||||
let value = json!({"foo/bar": "baz"});
|
||||
assert_eq!(pointer.get(&value).unwrap(), &json!("baz"));
|
||||
let mut value = json!({"foo/bar": "baz"});
|
||||
assert_eq!(pointer.get_mut(&mut value).unwrap(), &json!("baz"));
|
||||
|
||||
let pointer = JsonPointer::new("/foo~0bar").unwrap();
|
||||
let value = json!({"foo~bar": "baz"});
|
||||
assert_eq!(pointer.get(&value).unwrap(), &json!("baz"));
|
||||
let mut value = json!({"foo~bar": "baz"});
|
||||
assert_eq!(pointer.get_mut(&mut value).unwrap(), &json!("baz"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
|
|||
414
src/pointer_errors.rs
Normal file
414
src/pointer_errors.rs
Normal file
|
|
@ -0,0 +1,414 @@
|
|||
// json-archive is a tool for tracking JSON file changes over time
|
||||
// Copyright (C) 2025 Peoples Grocers LLC
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as published
|
||||
// by the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
//
|
||||
// To purchase a license under different terms contact admin@peoplesgrocers.com
|
||||
// To request changes, report bugs, or give user feedback contact
|
||||
// marxism@peoplesgrocers.com
|
||||
//
|
||||
|
||||
//! Rich error reporting for JSON Pointer operations.
|
||||
//!
|
||||
//! # Why this module exists
|
||||
//!
|
||||
//! When a JSON Pointer operation fails (e.g., key not found, type mismatch),
|
||||
//! the user needs enough context to understand what went wrong. A message like
|
||||
//! "key 'emial' not found" isn't helpful without knowing:
|
||||
//!
|
||||
//! - What was the full path being traversed?
|
||||
//! - Which segment of the path failed?
|
||||
//! - What does the value at that point actually look like?
|
||||
//! - Did they maybe have a typo?
|
||||
//!
|
||||
//! This module builds diagnostic messages that answer these questions.
|
||||
//!
|
||||
//! # Why errors don't include filename/line number
|
||||
//!
|
||||
//! JsonPointer operates on JSON values extracted from a larger JSON Lines archive
|
||||
//! file. The pointer doesn't know which line of the archive the value came from.
|
||||
//! The caller (typically `reader.rs`) attaches location info via `.with_location()`:
|
||||
//!
|
||||
//! ```ignore
|
||||
//! if let Err(diag) = pointer.set(&mut state, value) {
|
||||
//! collector.add(diag.with_location(filename.clone(), line_number));
|
||||
//! }
|
||||
//! ```
|
||||
//!
|
||||
//! # Why set/remove navigate to parent first
|
||||
//!
|
||||
//! For `get_mut`, we traverse the entire path and return the value at the end.
|
||||
//!
|
||||
//! For `set` and `remove`, we need to modify a container (object or array), not
|
||||
//! the value itself. To insert a key into an object or remove an element from an
|
||||
//! array, we need a mutable reference to the parent container. So we:
|
||||
//!
|
||||
//! 1. Navigate to the parent of the target path (all tokens except the last)
|
||||
//! 2. Then operate on the last token against that parent
|
||||
//!
|
||||
//! This means errors can occur in two places:
|
||||
//! - During parent traversal (handled by `get_mut`'s error reporting)
|
||||
//! - When operating on the final token (e.g., index out of bounds on the array)
|
||||
|
||||
use crate::diagnostics::{Diagnostic, DiagnosticCode};
|
||||
use serde_json::Value;
|
||||
use std::cmp::min;
|
||||
|
||||
const MAX_STRING_DISPLAY_LEN: usize = 50;
|
||||
|
||||
/// Format a JSON value compactly for error display.
|
||||
/// - Strings: truncated to MAX_STRING_DISPLAY_LEN chars
|
||||
/// - Objects: show keys with formatted values, nested objects as {...}
|
||||
/// - Arrays: show indices with formatted values, nested arrays as [...]
|
||||
pub fn format_value_compact(value: &Value) -> Vec<String> {
|
||||
match value {
|
||||
Value::Null => vec!["null".to_string()],
|
||||
Value::Bool(b) => vec![b.to_string()],
|
||||
Value::Number(n) => vec![n.to_string()],
|
||||
Value::String(s) => {
|
||||
if s.len() > MAX_STRING_DISPLAY_LEN {
|
||||
vec![format!("\"{}...\"", &s[..MAX_STRING_DISPLAY_LEN])]
|
||||
} else {
|
||||
vec![format!("\"{}\"", s)]
|
||||
}
|
||||
}
|
||||
Value::Array(arr) => arr
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(i, v)| format!("{}: {}", i, format_value_inline(v)))
|
||||
.collect(),
|
||||
Value::Object(obj) => obj
|
||||
.iter()
|
||||
.map(|(k, v)| format!("\"{}\": {}", k, format_value_inline(v)))
|
||||
.collect(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Format a value for inline display (single token).
|
||||
/// Nested structures become {...} or [...].
|
||||
fn format_value_inline(value: &Value) -> String {
|
||||
match value {
|
||||
Value::Null => "null".to_string(),
|
||||
Value::Bool(b) => b.to_string(),
|
||||
Value::Number(n) => n.to_string(),
|
||||
Value::String(s) => {
|
||||
if s.len() > MAX_STRING_DISPLAY_LEN {
|
||||
format!("\"{}...\"", &s[..MAX_STRING_DISPLAY_LEN])
|
||||
} else {
|
||||
format!("\"{}\"", s)
|
||||
}
|
||||
}
|
||||
Value::Array(_) => "[...]".to_string(),
|
||||
Value::Object(_) => "{...}".to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Format the path with underline showing which segment failed.
|
||||
/// Returns (path_line, underline_line).
|
||||
pub fn format_path_with_underline(tokens: &[String], failed_index: usize) -> (String, String) {
|
||||
if tokens.is_empty() {
|
||||
return ("(root)".to_string(), "^^^^^^".to_string());
|
||||
}
|
||||
|
||||
let mut path = String::new();
|
||||
let mut underline = String::new();
|
||||
|
||||
for (i, token) in tokens.iter().enumerate() {
|
||||
let escaped = token.replace("~", "~0").replace("/", "~1");
|
||||
path.push('/');
|
||||
underline.push(' '); // space for the '/'
|
||||
|
||||
if i == failed_index {
|
||||
underline.push_str(&"^".repeat(escaped.len()));
|
||||
} else {
|
||||
underline.push_str(&" ".repeat(escaped.len()));
|
||||
}
|
||||
path.push_str(&escaped);
|
||||
}
|
||||
|
||||
(path, underline)
|
||||
}
|
||||
|
||||
/// Build the path string for tokens up to (but not including) the given index.
|
||||
pub fn path_up_to(tokens: &[String], index: usize) -> String {
|
||||
if index == 0 {
|
||||
return "(root)".to_string();
|
||||
}
|
||||
let prefix: Vec<String> = tokens[..index]
|
||||
.iter()
|
||||
.map(|t| t.replace("~", "~0").replace("/", "~1"))
|
||||
.collect();
|
||||
format!("/{}", prefix.join("/"))
|
||||
}
|
||||
|
||||
/// Calculate Levenshtein distance between two strings.
|
||||
fn levenshtein_distance(a: &str, b: &str) -> usize {
|
||||
let a_chars: Vec<char> = a.chars().collect();
|
||||
let b_chars: Vec<char> = b.chars().collect();
|
||||
let a_len = a_chars.len();
|
||||
let b_len = b_chars.len();
|
||||
|
||||
if a_len == 0 {
|
||||
return b_len;
|
||||
}
|
||||
if b_len == 0 {
|
||||
return a_len;
|
||||
}
|
||||
|
||||
let mut prev_row: Vec<usize> = (0..=b_len).collect();
|
||||
let mut curr_row: Vec<usize> = vec![0; b_len + 1];
|
||||
|
||||
for i in 1..=a_len {
|
||||
curr_row[0] = i;
|
||||
for j in 1..=b_len {
|
||||
let cost = if a_chars[i - 1] == b_chars[j - 1] {
|
||||
0
|
||||
} else {
|
||||
1
|
||||
};
|
||||
curr_row[j] = min(
|
||||
min(prev_row[j] + 1, curr_row[j - 1] + 1),
|
||||
prev_row[j - 1] + cost,
|
||||
);
|
||||
}
|
||||
std::mem::swap(&mut prev_row, &mut curr_row);
|
||||
}
|
||||
|
||||
prev_row[b_len]
|
||||
}
|
||||
|
||||
/// Find similar keys in a list (edit distance ≤ 2).
|
||||
pub fn find_similar_keys<'a>(target: &str, keys: &[&'a str]) -> Vec<&'a str> {
|
||||
keys.iter()
|
||||
.filter(|k| levenshtein_distance(target, k) <= 2)
|
||||
.copied()
|
||||
.collect()
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Error builders for get_mut traversal errors
|
||||
// =============================================================================
|
||||
|
||||
pub fn build_key_not_found_error(
|
||||
tokens: &[String],
|
||||
token_index: usize,
|
||||
token: &str,
|
||||
keys: &[&str],
|
||||
) -> Diagnostic {
|
||||
let full_path = tokens_to_path(tokens);
|
||||
let (path_line, underline) = format_path_with_underline(tokens, token_index);
|
||||
let parent_path = path_up_to(tokens, token_index);
|
||||
|
||||
let description = format!(
|
||||
"I was traversing the JSON path '{}' and got stuck.\n\n\
|
||||
I couldn't find the key '{}'.",
|
||||
full_path, token
|
||||
);
|
||||
|
||||
let mut snippet_lines = vec![format!(" {}", path_line), format!(" {}", underline)];
|
||||
|
||||
snippet_lines.push(String::new());
|
||||
snippet_lines.push(format!("Value at '{}':", parent_path));
|
||||
for key in keys {
|
||||
snippet_lines.push(format!("│ \"{}\": ...", key));
|
||||
}
|
||||
|
||||
let mut advice_parts = vec![format!("Available keys: {}", keys.join(", "))];
|
||||
|
||||
let similar = find_similar_keys(token, keys);
|
||||
if !similar.is_empty() {
|
||||
advice_parts.push(format!("Did you mean '{}'?", similar[0]));
|
||||
}
|
||||
|
||||
Diagnostic::fatal(DiagnosticCode::PathNotFound, description)
|
||||
.with_snippet(snippet_lines.join("\n"))
|
||||
.with_advice(advice_parts.join("\n"))
|
||||
}
|
||||
|
||||
pub fn build_invalid_array_index_error(
|
||||
tokens: &[String],
|
||||
token_index: usize,
|
||||
token: &str,
|
||||
arr: &[Value],
|
||||
) -> Diagnostic {
|
||||
let full_path = tokens_to_path(tokens);
|
||||
let (path_line, underline) = format_path_with_underline(tokens, token_index);
|
||||
let parent_path = path_up_to(tokens, token_index);
|
||||
|
||||
let description = format!(
|
||||
"I was traversing the JSON path '{}' and got stuck.\n\n\
|
||||
I couldn't parse '{}' as an array index.",
|
||||
full_path, token
|
||||
);
|
||||
|
||||
let mut snippet_lines = vec![format!(" {}", path_line), format!(" {}", underline)];
|
||||
|
||||
snippet_lines.push(String::new());
|
||||
snippet_lines.push(format!("Value at '{}':", parent_path));
|
||||
for line in format_value_compact(&Value::Array(arr.to_vec())) {
|
||||
snippet_lines.push(format!("│ {}", line));
|
||||
}
|
||||
|
||||
let advice = format!(
|
||||
"Array indices must be non-negative integers. Got '{}'.",
|
||||
token
|
||||
);
|
||||
|
||||
Diagnostic::fatal(DiagnosticCode::InvalidArrayIndex, description)
|
||||
.with_snippet(snippet_lines.join("\n"))
|
||||
.with_advice(advice)
|
||||
}
|
||||
|
||||
pub fn build_array_index_out_of_bounds_error(
|
||||
tokens: &[String],
|
||||
token_index: usize,
|
||||
index: usize,
|
||||
arr_len: usize,
|
||||
arr: &[Value],
|
||||
) -> Diagnostic {
|
||||
let full_path = tokens_to_path(tokens);
|
||||
let (path_line, underline) = format_path_with_underline(tokens, token_index);
|
||||
let parent_path = path_up_to(tokens, token_index);
|
||||
|
||||
let description = format!(
|
||||
"I was traversing the JSON path '{}' and got stuck.\n\n\
|
||||
I couldn't find index {} (array length is {}).",
|
||||
full_path, index, arr_len
|
||||
);
|
||||
|
||||
let mut snippet_lines = vec![format!(" {}", path_line), format!(" {}", underline)];
|
||||
|
||||
snippet_lines.push(String::new());
|
||||
snippet_lines.push(format!("Value at '{}':", parent_path));
|
||||
for line in format_value_compact(&Value::Array(arr.to_vec())) {
|
||||
snippet_lines.push(format!("│ {}", line));
|
||||
}
|
||||
|
||||
let advice = if arr_len == 0 {
|
||||
"The array is empty.".to_string()
|
||||
} else {
|
||||
format!("Valid indices are 0-{}.", arr_len - 1)
|
||||
};
|
||||
|
||||
Diagnostic::fatal(DiagnosticCode::PathNotFound, description)
|
||||
.with_snippet(snippet_lines.join("\n"))
|
||||
.with_advice(advice)
|
||||
}
|
||||
|
||||
pub fn build_type_mismatch_error(
|
||||
tokens: &[String],
|
||||
token_index: usize,
|
||||
token: &str,
|
||||
current: &Value,
|
||||
) -> Diagnostic {
|
||||
let full_path = tokens_to_path(tokens);
|
||||
let (path_line, underline) = format_path_with_underline(tokens, token_index);
|
||||
let parent_path = path_up_to(tokens, token_index);
|
||||
let type_name = value_type_name(current);
|
||||
|
||||
let description = format!(
|
||||
"I was traversing the JSON path '{}' and got stuck.\n\n\
|
||||
I can't index into {} with '{}'.",
|
||||
full_path, type_name, token
|
||||
);
|
||||
|
||||
let mut snippet_lines = vec![format!(" {}", path_line), format!(" {}", underline)];
|
||||
|
||||
snippet_lines.push(String::new());
|
||||
snippet_lines.push(format!("Value at '{}':", parent_path));
|
||||
for line in format_value_compact(current) {
|
||||
snippet_lines.push(format!("│ {}", line));
|
||||
}
|
||||
|
||||
let advice = if token.parse::<usize>().is_ok() {
|
||||
format!(
|
||||
"Array indices like '/{}' only work on arrays, not {}.",
|
||||
token, type_name
|
||||
)
|
||||
} else {
|
||||
format!(
|
||||
"Object keys like '/{}' only work on objects, not {}.",
|
||||
token, type_name
|
||||
)
|
||||
};
|
||||
|
||||
Diagnostic::fatal(DiagnosticCode::TypeMismatch, description)
|
||||
.with_snippet(snippet_lines.join("\n"))
|
||||
.with_advice(advice)
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Helpers
|
||||
// =============================================================================
|
||||
|
||||
fn tokens_to_path(tokens: &[String]) -> String {
|
||||
if tokens.is_empty() {
|
||||
return "".to_string();
|
||||
}
|
||||
let escaped: Vec<String> = tokens
|
||||
.iter()
|
||||
.map(|t| t.replace("~", "~0").replace("/", "~1"))
|
||||
.collect();
|
||||
format!("/{}", escaped.join("/"))
|
||||
}
|
||||
|
||||
fn value_type_name(value: &Value) -> &'static str {
|
||||
match value {
|
||||
Value::Null => "null",
|
||||
Value::Bool(_) => "boolean",
|
||||
Value::Number(_) => "number",
|
||||
Value::String(_) => "string",
|
||||
Value::Array(_) => "array",
|
||||
Value::Object(_) => "object",
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_key_not_found_error_output() {
|
||||
let tokens = vec!["user".to_string(), "emial".to_string()];
|
||||
let keys = vec!["name", "email", "age"];
|
||||
let diag = build_key_not_found_error(&tokens, 1, "emial", &keys);
|
||||
|
||||
println!("\n--- Key not found error ---");
|
||||
println!("{}", diag);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_type_mismatch_error_output() {
|
||||
let tokens = vec!["users".to_string(), "0".to_string(), "email".to_string(), "domain".to_string()];
|
||||
let current = Value::String("alice@example.com".to_string());
|
||||
let diag = build_type_mismatch_error(&tokens, 3, "domain", ¤t);
|
||||
|
||||
println!("\n--- Type mismatch error ---");
|
||||
println!("{}", diag);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_array_out_of_bounds_error_output() {
|
||||
let tokens = vec!["items".to_string(), "5".to_string()];
|
||||
let arr = vec![
|
||||
Value::String("apple".to_string()),
|
||||
Value::String("banana".to_string()),
|
||||
Value::String("cherry".to_string()),
|
||||
];
|
||||
let diag = build_array_index_out_of_bounds_error(&tokens, 1, 5, 3, &arr);
|
||||
|
||||
println!("\n--- Array out of bounds error ---");
|
||||
println!("{}", diag);
|
||||
}
|
||||
}
|
||||
64
tests/compressed_archive_tests.rs
Normal file
64
tests/compressed_archive_tests.rs
Normal file
|
|
@ -0,0 +1,64 @@
|
|||
// Integration tests for compressed archive functionality
|
||||
|
||||
use json_archive::{append_to_archive, ArchiveWriter, Header};
|
||||
use json_archive::{ArchiveReader, ReadMode};
|
||||
use serde_json::json;
|
||||
use std::io::Write;
|
||||
use tempfile::NamedTempFile;
|
||||
|
||||
#[test]
|
||||
#[cfg(feature = "compression")]
|
||||
fn test_append_to_compressed_archive_basic() -> Result<(), Box<dyn std::error::Error>> {
|
||||
use flate2::write::GzEncoder;
|
||||
use flate2::Compression;
|
||||
|
||||
// Create initial archive
|
||||
let archive_file = NamedTempFile::with_suffix(".json.archive")?;
|
||||
let header = Header::new(json!({"count": 0}), Some("test".to_string()));
|
||||
|
||||
{
|
||||
let mut writer = ArchiveWriter::new(archive_file.path(), None)
|
||||
.map_err(|e| format!("Failed to create writer: {:?}", e))?;
|
||||
writer.write_header(&header)
|
||||
.map_err(|e| format!("Failed to write header: {:?}", e))?;
|
||||
writer.finish()
|
||||
.map_err(|e| format!("Failed to finish: {:?}", e))?;
|
||||
}
|
||||
|
||||
// Compress it
|
||||
let compressed_file = NamedTempFile::with_suffix(".json.archive.gz")?;
|
||||
{
|
||||
let input = std::fs::read(archive_file.path())?;
|
||||
let mut encoder = GzEncoder::new(
|
||||
compressed_file.as_file().try_clone()?,
|
||||
Compression::default()
|
||||
);
|
||||
encoder.write_all(&input)?;
|
||||
encoder.finish()?;
|
||||
}
|
||||
|
||||
// Create a new state file to append
|
||||
let mut state_file = NamedTempFile::new()?;
|
||||
writeln!(state_file, r#"{{"count": 1}}"#)?;
|
||||
state_file.flush()?;
|
||||
|
||||
// Append to compressed archive
|
||||
let diagnostics = append_to_archive(
|
||||
compressed_file.path(),
|
||||
&[state_file.path()],
|
||||
compressed_file.path(),
|
||||
None,
|
||||
None,
|
||||
);
|
||||
|
||||
// Should succeed with no diagnostics
|
||||
assert!(diagnostics.is_empty(), "Got diagnostics: {:?}", diagnostics);
|
||||
|
||||
// Verify the archive was updated (decompressed)
|
||||
let reader = ArchiveReader::new(compressed_file.path(), ReadMode::FullValidation)?;
|
||||
let result = reader.read(compressed_file.path())?;
|
||||
assert_eq!(result.final_state, json!({"count": 1}));
|
||||
assert_eq!(result.observation_count, 1);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
78
tests/compression-integration/README.md
Normal file
78
tests/compression-integration/README.md
Normal file
|
|
@ -0,0 +1,78 @@
|
|||
# Compression Integration Tests
|
||||
|
||||
Manual integration tests for compressed archive functionality.
|
||||
|
||||
These scripts exercise the tool's ability to:
|
||||
1. Read archives that were compressed by external programs (gzip, brotli, zstd)
|
||||
2. Append new observations to compressed archives
|
||||
3. Produce correct results whether reading compressed or uncompressed
|
||||
|
||||
## Scripts
|
||||
|
||||
### `generate_state.py <n>`
|
||||
Generates a JSON state file with `n` items in each array. Output goes to stdout.
|
||||
|
||||
```bash
|
||||
./generate_state.py 3
|
||||
# Output: {"colors":["color_1","color_2","color_3"],"numbers":["number_1","number_2","number_3"],"animals":["animal_1","animal_2","animal_3"]}
|
||||
```
|
||||
|
||||
### `generate_state_files.py <count> <output_dir>`
|
||||
Generates a series of state files (state_1.json through state_N.json) with progressively more items.
|
||||
|
||||
```bash
|
||||
./generate_state_files.py 9 ./data
|
||||
# Creates: data/state_1.json, data/state_2.json, ... data/state_9.json
|
||||
```
|
||||
|
||||
### `run_gzip_test.sh`
|
||||
Tests the gzip compression workflow:
|
||||
1. Create archive from first state file
|
||||
2. Compress with gzip
|
||||
3. Append remaining 8 state files to the compressed archive
|
||||
4. Decompress and inspect
|
||||
|
||||
### `run_brotli_test.sh`
|
||||
Same workflow but with brotli compression.
|
||||
|
||||
### `run_zstd_test.sh`
|
||||
Same workflow but with zstd compression.
|
||||
|
||||
### `run_all.sh`
|
||||
Runs all compression tests in sequence.
|
||||
|
||||
### `validate.sh` (optional)
|
||||
Smoke test to verify the final state matches expectations.
|
||||
|
||||
## Usage
|
||||
|
||||
```bash
|
||||
cd tests/compression-integration
|
||||
|
||||
# Run all tests (generates data, builds, runs all compression formats)
|
||||
./run_all.sh
|
||||
|
||||
# Or run individual steps:
|
||||
./generate_state_files.py 9 ./data
|
||||
./run_gzip_test.sh
|
||||
./run_brotli_test.sh
|
||||
./run_zstd_test.sh
|
||||
|
||||
# Optional: validate outputs match
|
||||
./validate.sh
|
||||
```
|
||||
|
||||
## What to look for
|
||||
|
||||
After running the tests, you can manually verify:
|
||||
|
||||
1. The compressed archives were created
|
||||
2. Appending to compressed archives worked (check file sizes grew)
|
||||
3. The `info` command shows the same observation count for compressed and decompressed versions
|
||||
4. The `state` command returns the same final state
|
||||
|
||||
## Dependencies
|
||||
|
||||
- gzip (usually pre-installed)
|
||||
- brotli (`brew install brotli`)
|
||||
- zstd (`brew install zstd`)
|
||||
28
tests/compression-integration/generate_state.py
Executable file
28
tests/compression-integration/generate_state.py
Executable file
|
|
@ -0,0 +1,28 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Generate a JSON state file with N items in each array.
|
||||
Output goes to stdout.
|
||||
|
||||
Usage: ./generate_state.py <n>
|
||||
"""
|
||||
|
||||
import json
|
||||
import sys
|
||||
|
||||
def main():
|
||||
if len(sys.argv) != 2:
|
||||
print("Usage: generate_state.py <n>", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
n = int(sys.argv[1])
|
||||
|
||||
state = {
|
||||
"colors": [f"color_{i}" for i in range(1, n + 1)],
|
||||
"numbers": [f"number_{i}" for i in range(1, n + 1)],
|
||||
"animals": [f"animal_{i}" for i in range(1, n + 1)],
|
||||
}
|
||||
|
||||
print(json.dumps(state))
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
39
tests/compression-integration/generate_state_files.py
Executable file
39
tests/compression-integration/generate_state_files.py
Executable file
|
|
@ -0,0 +1,39 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Generate a series of state files with progressively more items.
|
||||
|
||||
Usage: ./generate_state_files.py <count> <output_dir>
|
||||
|
||||
Creates: output_dir/state_1.json, state_2.json, ..., state_N.json
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
|
||||
def generate_state(n):
|
||||
return {
|
||||
"colors": [f"color_{i}" for i in range(1, n + 1)],
|
||||
"numbers": [f"number_{i}" for i in range(1, n + 1)],
|
||||
"animals": [f"animal_{i}" for i in range(1, n + 1)],
|
||||
}
|
||||
|
||||
def main():
|
||||
if len(sys.argv) != 3:
|
||||
print("Usage: generate_state_files.py <count> <output_dir>", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
count = int(sys.argv[1])
|
||||
output_dir = sys.argv[2]
|
||||
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
for i in range(1, count + 1):
|
||||
state = generate_state(i)
|
||||
path = os.path.join(output_dir, f"state_{i}.json")
|
||||
with open(path, "w") as f:
|
||||
json.dump(state, f)
|
||||
print(f"Created {path}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
33
tests/compression-integration/run_all.sh
Executable file
33
tests/compression-integration/run_all.sh
Executable file
|
|
@ -0,0 +1,33 @@
|
|||
#!/usr/bin/env bash
|
||||
#
|
||||
# Run all compression integration tests.
|
||||
#
|
||||
# Usage: ./run_all.sh
|
||||
#
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_DIR="$(dirname "$(dirname "$SCRIPT_DIR")")"
|
||||
|
||||
echo "=== Building json-archive with compression support ==="
|
||||
cd "$PROJECT_DIR"
|
||||
cargo build --features compression
|
||||
|
||||
echo ""
|
||||
echo "=== Generating test data ==="
|
||||
cd "$SCRIPT_DIR"
|
||||
python3 generate_state_files.py 9 ./data
|
||||
|
||||
echo ""
|
||||
"$SCRIPT_DIR/run_gzip_test.sh"
|
||||
|
||||
echo ""
|
||||
"$SCRIPT_DIR/run_brotli_test.sh"
|
||||
|
||||
echo ""
|
||||
"$SCRIPT_DIR/run_zstd_test.sh"
|
||||
|
||||
echo ""
|
||||
echo "=== All tests complete ==="
|
||||
echo "Output files are in: $SCRIPT_DIR/out/"
|
||||
55
tests/compression-integration/run_brotli_test.sh
Executable file
55
tests/compression-integration/run_brotli_test.sh
Executable file
|
|
@ -0,0 +1,55 @@
|
|||
#!/usr/bin/env bash
|
||||
#
|
||||
# Test brotli compression workflow:
|
||||
# 1. Create archive from first state file
|
||||
# 2. Compress with brotli
|
||||
# 3. Append remaining state files to the compressed archive
|
||||
# 4. Decompress and show info
|
||||
#
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_DIR="$(dirname "$(dirname "$SCRIPT_DIR")")"
|
||||
BINARY="$PROJECT_DIR/target/debug/json-archive"
|
||||
DATA_DIR="$SCRIPT_DIR/data"
|
||||
OUT_DIR="$SCRIPT_DIR/out/brotli"
|
||||
|
||||
echo "=== Brotli Compression Test ==="
|
||||
|
||||
# Setup
|
||||
rm -rf "$OUT_DIR"
|
||||
mkdir -p "$OUT_DIR"
|
||||
|
||||
# Create initial archive from first state file
|
||||
echo "Creating archive from state_1.json..."
|
||||
"$BINARY" "$DATA_DIR/state_1.json" -o "$OUT_DIR/test.json.archive"
|
||||
|
||||
# Compress with brotli
|
||||
echo "Compressing with brotli..."
|
||||
brotli "$OUT_DIR/test.json.archive"
|
||||
ls -la "$OUT_DIR/"
|
||||
|
||||
# Append remaining files to compressed archive
|
||||
for i in $(seq 2 9); do
|
||||
echo "Appending state_$i.json to compressed archive..."
|
||||
"$BINARY" "$OUT_DIR/test.json.archive.br" "$DATA_DIR/state_$i.json"
|
||||
done
|
||||
|
||||
# Show info on the result
|
||||
echo ""
|
||||
echo "Final archive info:"
|
||||
"$BINARY" info "$OUT_DIR/test.json.archive.br"
|
||||
|
||||
# Decompress for manual inspection
|
||||
echo ""
|
||||
echo "Decompressing for comparison..."
|
||||
brotli -d -k "$OUT_DIR/test.json.archive.br"
|
||||
|
||||
echo ""
|
||||
echo "Decompressed archive info:"
|
||||
"$BINARY" info "$OUT_DIR/test.json.archive"
|
||||
|
||||
echo ""
|
||||
echo "Files in $OUT_DIR:"
|
||||
ls -la "$OUT_DIR/"
|
||||
55
tests/compression-integration/run_gzip_test.sh
Executable file
55
tests/compression-integration/run_gzip_test.sh
Executable file
|
|
@ -0,0 +1,55 @@
|
|||
#!/usr/bin/env bash
|
||||
#
|
||||
# Test gzip compression workflow:
|
||||
# 1. Create archive from first state file
|
||||
# 2. Compress with gzip
|
||||
# 3. Append remaining state files to the compressed archive
|
||||
# 4. Decompress and show info
|
||||
#
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_DIR="$(dirname "$(dirname "$SCRIPT_DIR")")"
|
||||
BINARY="$PROJECT_DIR/target/debug/json-archive"
|
||||
DATA_DIR="$SCRIPT_DIR/data"
|
||||
OUT_DIR="$SCRIPT_DIR/out/gzip"
|
||||
|
||||
echo "=== Gzip Compression Test ==="
|
||||
|
||||
# Setup
|
||||
rm -rf "$OUT_DIR"
|
||||
mkdir -p "$OUT_DIR"
|
||||
|
||||
# Create initial archive from first state file
|
||||
echo "Creating archive from state_1.json..."
|
||||
"$BINARY" "$DATA_DIR/state_1.json" -o "$OUT_DIR/test.json.archive"
|
||||
|
||||
# Compress with gzip
|
||||
echo "Compressing with gzip..."
|
||||
gzip "$OUT_DIR/test.json.archive"
|
||||
ls -la "$OUT_DIR/"
|
||||
|
||||
# Append remaining files to compressed archive
|
||||
for i in $(seq 2 9); do
|
||||
echo "Appending state_$i.json to compressed archive..."
|
||||
"$BINARY" "$OUT_DIR/test.json.archive.gz" "$DATA_DIR/state_$i.json"
|
||||
done
|
||||
|
||||
# Show info on the result
|
||||
echo ""
|
||||
echo "Final archive info:"
|
||||
"$BINARY" info "$OUT_DIR/test.json.archive.gz"
|
||||
|
||||
# Decompress for manual inspection
|
||||
echo ""
|
||||
echo "Decompressing for comparison..."
|
||||
gunzip -k "$OUT_DIR/test.json.archive.gz" 2>/dev/null || gunzip -c "$OUT_DIR/test.json.archive.gz" > "$OUT_DIR/test.json.archive"
|
||||
|
||||
echo ""
|
||||
echo "Decompressed archive info:"
|
||||
"$BINARY" info "$OUT_DIR/test.json.archive"
|
||||
|
||||
echo ""
|
||||
echo "Files in $OUT_DIR:"
|
||||
ls -la "$OUT_DIR/"
|
||||
56
tests/compression-integration/run_zstd_test.sh
Executable file
56
tests/compression-integration/run_zstd_test.sh
Executable file
|
|
@ -0,0 +1,56 @@
|
|||
#!/usr/bin/env bash
|
||||
#
|
||||
# Test zstd compression workflow:
|
||||
# 1. Create archive from first state file
|
||||
# 2. Compress with zstd
|
||||
# 3. Append remaining state files to the compressed archive
|
||||
# 4. Decompress and show info
|
||||
#
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_DIR="$(dirname "$(dirname "$SCRIPT_DIR")")"
|
||||
BINARY="$PROJECT_DIR/target/debug/json-archive"
|
||||
DATA_DIR="$SCRIPT_DIR/data"
|
||||
OUT_DIR="$SCRIPT_DIR/out/zstd"
|
||||
|
||||
echo "=== Zstd Compression Test ==="
|
||||
|
||||
# Setup
|
||||
rm -rf "$OUT_DIR"
|
||||
mkdir -p "$OUT_DIR"
|
||||
|
||||
# Create initial archive from first state file
|
||||
echo "Creating archive from state_1.json..."
|
||||
"$BINARY" "$DATA_DIR/state_1.json" -o "$OUT_DIR/test.json.archive"
|
||||
|
||||
# Compress with zstd
|
||||
echo "Compressing with zstd..."
|
||||
zstd "$OUT_DIR/test.json.archive"
|
||||
rm "$OUT_DIR/test.json.archive"
|
||||
ls -la "$OUT_DIR/"
|
||||
|
||||
# Append remaining files to compressed archive
|
||||
for i in $(seq 2 9); do
|
||||
echo "Appending state_$i.json to compressed archive..."
|
||||
"$BINARY" "$OUT_DIR/test.json.archive.zst" "$DATA_DIR/state_$i.json"
|
||||
done
|
||||
|
||||
# Show info on the result
|
||||
echo ""
|
||||
echo "Final archive info:"
|
||||
"$BINARY" info "$OUT_DIR/test.json.archive.zst"
|
||||
|
||||
# Decompress for manual inspection
|
||||
echo ""
|
||||
echo "Decompressing for comparison..."
|
||||
zstd -d -k "$OUT_DIR/test.json.archive.zst"
|
||||
|
||||
echo ""
|
||||
echo "Decompressed archive info:"
|
||||
"$BINARY" info "$OUT_DIR/test.json.archive"
|
||||
|
||||
echo ""
|
||||
echo "Files in $OUT_DIR:"
|
||||
ls -la "$OUT_DIR/"
|
||||
63
tests/compression-integration/validate.sh
Executable file
63
tests/compression-integration/validate.sh
Executable file
|
|
@ -0,0 +1,63 @@
|
|||
#!/usr/bin/env bash
|
||||
#
|
||||
# Validate that compressed and decompressed archives produce the same results.
|
||||
# Run this after run_all.sh to smoke test the outputs.
|
||||
#
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_DIR="$(dirname "$(dirname "$SCRIPT_DIR")")"
|
||||
BINARY="$PROJECT_DIR/target/debug/json-archive"
|
||||
|
||||
echo "=== Validation ==="
|
||||
|
||||
errors=0
|
||||
|
||||
for format in gzip brotli zstd; do
|
||||
dir="$SCRIPT_DIR/out/$format"
|
||||
|
||||
if [ ! -d "$dir" ]; then
|
||||
echo "SKIP: $format (no output directory)"
|
||||
continue
|
||||
fi
|
||||
|
||||
# Find the compressed and uncompressed files
|
||||
compressed=$(find "$dir" -name "*.gz" -o -name "*.br" -o -name "*.zst" | head -1)
|
||||
uncompressed="$dir/test.json.archive"
|
||||
|
||||
if [ ! -f "$compressed" ] || [ ! -f "$uncompressed" ]; then
|
||||
echo "SKIP: $format (missing files)"
|
||||
continue
|
||||
fi
|
||||
|
||||
# Compare state output
|
||||
state_compressed=$("$BINARY" state "$compressed")
|
||||
state_uncompressed=$("$BINARY" state "$uncompressed")
|
||||
|
||||
if [ "$state_compressed" = "$state_uncompressed" ]; then
|
||||
echo "OK: $format - state matches"
|
||||
else
|
||||
echo "FAIL: $format - state differs"
|
||||
errors=$((errors + 1))
|
||||
fi
|
||||
|
||||
# Compare observation count from info
|
||||
count_compressed=$("$BINARY" info "$compressed" --output json | python3 -c "import sys,json; print(json.load(sys.stdin)['observation_count'])")
|
||||
count_uncompressed=$("$BINARY" info "$uncompressed" --output json | python3 -c "import sys,json; print(json.load(sys.stdin)['observation_count'])")
|
||||
|
||||
if [ "$count_compressed" = "$count_uncompressed" ]; then
|
||||
echo "OK: $format - observation count matches ($count_compressed)"
|
||||
else
|
||||
echo "FAIL: $format - observation count differs ($count_compressed vs $count_uncompressed)"
|
||||
errors=$((errors + 1))
|
||||
fi
|
||||
done
|
||||
|
||||
echo ""
|
||||
if [ $errors -eq 0 ]; then
|
||||
echo "All validations passed."
|
||||
else
|
||||
echo "$errors validation(s) failed."
|
||||
exit 1
|
||||
fi
|
||||
Loading…
Add table
Add a link
Reference in a new issue