refactor: decompose archive read/write into composable building blocks

Delete archive_context.rs and archive_ops.rs (1200+ lines of duplicated
logic). Replace with four focused modules:

1. open_archive() - opens a file, detects compression, returns raw bytes
2. read_archive() - parses bytes into validated observations
3. CompressionWriter - writes bytes with any compression format
4. WriteStrategy - given a list of files, determines input archive,
 output archive, output format, and which of four write modes to use:
  - Create: new archive, no input
  - Append: uncompressed input, seek to end
  - AtomicSwap: compressed input, rewrite via temp file
  - CopyOnWrite: different input/output paths, transcode between formats

Previously you could not specify output format. Appending always
preserved the input format, creating compressed archives didn't work.
Now all four cases work with any supported compression format.

Atomic swap now writes to temp file, then renames. Crash-safe.

Trade-off: This approach prioritizes code clarity over syscall efficiency.
  The archive file may be opened and read multiple times during a single
  operation (once for format detection, once for reading state, once for
  copying content). A more optimized implementation could reuse file
  handles, but the current approach makes each step's purpose obvious.
This commit is contained in:
nobody 2025-12-01 20:51:50 -08:00
commit b65103c9f7
Signed by: GrocerPublishAgent
GPG key ID: 43B1C298CDDE181C
28 changed files with 2055 additions and 2859 deletions

View file

@ -1,8 +1,8 @@
#![no_main]
use libfuzzer_sys::fuzz_target;
use json_archive::{ArchiveReader, ReadMode};
use std::io::Write;
use json_archive::{read_archive, ReadMode};
use std::io::{BufReader, Write};
use tempfile::NamedTempFile;
fn create_archive_content(data: &[u8]) -> Vec<u8> {
@ -80,24 +80,25 @@ fn create_archive_content(data: &[u8]) -> Vec<u8> {
fuzz_target!(|data: &[u8]| {
let archive_content = create_archive_content(data);
if let Ok(mut temp_file) = NamedTempFile::new() {
if temp_file.write_all(&archive_content).is_ok() {
// Test both validation modes
for mode in [ReadMode::FullValidation, ReadMode::AppendSeek] {
if let Ok(reader) = ArchiveReader::new(temp_file.path(), mode) {
let result = reader.read(temp_file.path());
if let Ok(file) = std::fs::File::open(temp_file.path()) {
let reader = BufReader::new(file);
let result = read_archive(reader, &temp_file.path().display().to_string(), mode);
// Should never panic, regardless of input malformation
match result {
Ok(read_result) => {
// Basic invariants that should hold for any successful parse
let _ = &read_result.final_state;
let _ = &read_result.diagnostics;
// Observation count should be reasonable
assert!(read_result.observation_count < 100000);
// If we have diagnostics, they should be well-formed
for diagnostic in read_result.diagnostics.diagnostics() {
assert!(!diagnostic.description.is_empty());

View file

@ -1,8 +1,8 @@
#![no_main]
use libfuzzer_sys::fuzz_target;
use json_archive::{ArchiveReader, ReadMode};
use std::io::Write;
use json_archive::{read_archive, ReadMode};
use std::io::{BufReader, Write};
use tempfile::NamedTempFile;
fuzz_target!(|data: &[u8]| {
@ -11,10 +11,11 @@ fuzz_target!(|data: &[u8]| {
if temp_file.write_all(data).is_ok() {
// Try to read the file with both validation modes
for mode in [ReadMode::FullValidation, ReadMode::AppendSeek] {
if let Ok(reader) = ArchiveReader::new(temp_file.path(), mode) {
if let Ok(file) = std::fs::File::open(temp_file.path()) {
let reader = BufReader::new(file);
// The read operation should never panic, regardless of input
// It should either succeed or return an error gracefully
let _ = reader.read(temp_file.path());
let _ = read_archive(reader, &temp_file.path().display().to_string(), mode);
}
}
}

View file

@ -2,8 +2,8 @@
use libfuzzer_sys::fuzz_target;
use arbitrary::{Arbitrary, Unstructured};
use json_archive::{ArchiveReader, ReadMode};
use std::io::Write;
use json_archive::{read_archive, ReadMode};
use std::io::{BufReader, Write};
use tempfile::NamedTempFile;
use serde_json::{json, Value};
@ -160,20 +160,21 @@ fuzz_target!(|data: &[u8]| {
let mut u = Unstructured::new(data);
if let Ok(archive) = FuzzArchive::arbitrary(&mut u) {
let content = archive.generate_archive();
if let Ok(mut temp_file) = NamedTempFile::new() {
if temp_file.write_all(content.as_bytes()).is_ok() {
// Test both validation modes
for mode in [ReadMode::FullValidation, ReadMode::AppendSeek] {
if let Ok(reader) = ArchiveReader::new(temp_file.path(), mode) {
let result = reader.read(temp_file.path());
if let Ok(file) = std::fs::File::open(temp_file.path()) {
let reader = BufReader::new(file);
let result = read_archive(reader, &temp_file.path().display().to_string(), mode);
// The operation should never panic
// Verify that diagnostics are properly generated for invalid structures
if let Ok(read_result) = result {
// Basic sanity checks on the result
assert!(read_result.observation_count < 10000); // Reasonable upper bound
// If there are fatal diagnostics, final state should be reasonable
if read_result.diagnostics.has_fatal() {
// Should still have some state (at least initial or null)