refactor: decompose archive read/write into composable building blocks
Delete archive_context.rs and archive_ops.rs (1200+ lines of duplicated logic). Replace with four focused modules: 1. open_archive() - opens a file, detects compression, returns raw bytes 2. read_archive() - parses bytes into validated observations 3. CompressionWriter - writes bytes with any compression format 4. WriteStrategy - given a list of files, determines input archive, output archive, output format, and which of four write modes to use: - Create: new archive, no input - Append: uncompressed input, seek to end - AtomicSwap: compressed input, rewrite via temp file - CopyOnWrite: different input/output paths, transcode between formats Previously you could not specify output format. Appending always preserved the input format, creating compressed archives didn't work. Now all four cases work with any supported compression format. Atomic swap now writes to temp file, then renames. Crash-safe. Trade-off: This approach prioritizes code clarity over syscall efficiency. The archive file may be opened and read multiple times during a single operation (once for format detection, once for reading state, once for copying content). A more optimized implementation could reuse file handles, but the current approach makes each step's purpose obvious.
This commit is contained in:
parent
a760114ffe
commit
b65103c9f7
28 changed files with 2055 additions and 2859 deletions
2
Cargo.lock
generated
2
Cargo.lock
generated
|
|
@ -234,7 +234,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "json-archive"
|
||||
version = "0.99.0"
|
||||
version = "0.99.1"
|
||||
dependencies = [
|
||||
"arbitrary",
|
||||
"brotli",
|
||||
|
|
|
|||
|
|
@ -1,8 +1,9 @@
|
|||
[package]
|
||||
name = "json-archive"
|
||||
version = "0.99.0"
|
||||
version = "0.99.1"
|
||||
edition = "2021"
|
||||
authors = ["Karl <marxism@peoplesgrocers.com>", "nobody <nobody@localhost>"]
|
||||
homepage = "https://peoplesgrocers.com/code/oss/json-archive"
|
||||
repository = "https://peoplesgrocers.com/code/oss/json-archive"
|
||||
license = "AGPL-3.0"
|
||||
description = "CLI tool for tracking JSON file changes over time using delta-based archives"
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
#![no_main]
|
||||
|
||||
use libfuzzer_sys::fuzz_target;
|
||||
use json_archive::{ArchiveReader, ReadMode};
|
||||
use std::io::Write;
|
||||
use json_archive::{read_archive, ReadMode};
|
||||
use std::io::{BufReader, Write};
|
||||
use tempfile::NamedTempFile;
|
||||
|
||||
fn create_archive_content(data: &[u8]) -> Vec<u8> {
|
||||
|
|
@ -80,24 +80,25 @@ fn create_archive_content(data: &[u8]) -> Vec<u8> {
|
|||
|
||||
fuzz_target!(|data: &[u8]| {
|
||||
let archive_content = create_archive_content(data);
|
||||
|
||||
|
||||
if let Ok(mut temp_file) = NamedTempFile::new() {
|
||||
if temp_file.write_all(&archive_content).is_ok() {
|
||||
// Test both validation modes
|
||||
for mode in [ReadMode::FullValidation, ReadMode::AppendSeek] {
|
||||
if let Ok(reader) = ArchiveReader::new(temp_file.path(), mode) {
|
||||
let result = reader.read(temp_file.path());
|
||||
|
||||
if let Ok(file) = std::fs::File::open(temp_file.path()) {
|
||||
let reader = BufReader::new(file);
|
||||
let result = read_archive(reader, &temp_file.path().display().to_string(), mode);
|
||||
|
||||
// Should never panic, regardless of input malformation
|
||||
match result {
|
||||
Ok(read_result) => {
|
||||
// Basic invariants that should hold for any successful parse
|
||||
let _ = &read_result.final_state;
|
||||
let _ = &read_result.diagnostics;
|
||||
|
||||
|
||||
// Observation count should be reasonable
|
||||
assert!(read_result.observation_count < 100000);
|
||||
|
||||
|
||||
// If we have diagnostics, they should be well-formed
|
||||
for diagnostic in read_result.diagnostics.diagnostics() {
|
||||
assert!(!diagnostic.description.is_empty());
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
#![no_main]
|
||||
|
||||
use libfuzzer_sys::fuzz_target;
|
||||
use json_archive::{ArchiveReader, ReadMode};
|
||||
use std::io::Write;
|
||||
use json_archive::{read_archive, ReadMode};
|
||||
use std::io::{BufReader, Write};
|
||||
use tempfile::NamedTempFile;
|
||||
|
||||
fuzz_target!(|data: &[u8]| {
|
||||
|
|
@ -11,10 +11,11 @@ fuzz_target!(|data: &[u8]| {
|
|||
if temp_file.write_all(data).is_ok() {
|
||||
// Try to read the file with both validation modes
|
||||
for mode in [ReadMode::FullValidation, ReadMode::AppendSeek] {
|
||||
if let Ok(reader) = ArchiveReader::new(temp_file.path(), mode) {
|
||||
if let Ok(file) = std::fs::File::open(temp_file.path()) {
|
||||
let reader = BufReader::new(file);
|
||||
// The read operation should never panic, regardless of input
|
||||
// It should either succeed or return an error gracefully
|
||||
let _ = reader.read(temp_file.path());
|
||||
let _ = read_archive(reader, &temp_file.path().display().to_string(), mode);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,8 +2,8 @@
|
|||
|
||||
use libfuzzer_sys::fuzz_target;
|
||||
use arbitrary::{Arbitrary, Unstructured};
|
||||
use json_archive::{ArchiveReader, ReadMode};
|
||||
use std::io::Write;
|
||||
use json_archive::{read_archive, ReadMode};
|
||||
use std::io::{BufReader, Write};
|
||||
use tempfile::NamedTempFile;
|
||||
use serde_json::{json, Value};
|
||||
|
||||
|
|
@ -160,20 +160,21 @@ fuzz_target!(|data: &[u8]| {
|
|||
let mut u = Unstructured::new(data);
|
||||
if let Ok(archive) = FuzzArchive::arbitrary(&mut u) {
|
||||
let content = archive.generate_archive();
|
||||
|
||||
|
||||
if let Ok(mut temp_file) = NamedTempFile::new() {
|
||||
if temp_file.write_all(content.as_bytes()).is_ok() {
|
||||
// Test both validation modes
|
||||
for mode in [ReadMode::FullValidation, ReadMode::AppendSeek] {
|
||||
if let Ok(reader) = ArchiveReader::new(temp_file.path(), mode) {
|
||||
let result = reader.read(temp_file.path());
|
||||
|
||||
if let Ok(file) = std::fs::File::open(temp_file.path()) {
|
||||
let reader = BufReader::new(file);
|
||||
let result = read_archive(reader, &temp_file.path().display().to_string(), mode);
|
||||
|
||||
// The operation should never panic
|
||||
// Verify that diagnostics are properly generated for invalid structures
|
||||
if let Ok(read_result) = result {
|
||||
// Basic sanity checks on the result
|
||||
assert!(read_result.observation_count < 10000); // Reasonable upper bound
|
||||
|
||||
|
||||
// If there are fatal diagnostics, final state should be reasonable
|
||||
if read_result.diagnostics.has_fatal() {
|
||||
// Should still have some state (at least initial or null)
|
||||
|
|
|
|||
|
|
@ -1,595 +0,0 @@
|
|||
// json-archive is a tool for tracking JSON file changes over time
|
||||
// Copyright (C) 2025 Peoples Grocers LLC
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as published
|
||||
// by the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
//
|
||||
// To purchase a license under different terms contact admin@peoplesgrocers.com
|
||||
// To request changes, report bugs, or give user feedback contact
|
||||
// marxism@peoplesgrocers.com
|
||||
//
|
||||
|
||||
//! Archive write context and shared observation writing logic.
|
||||
//!
|
||||
//! This module provides:
|
||||
//! - `WriteContext`: A struct that holds the state needed to write observations
|
||||
//! - `write_observations`: The shared logic for diffing JSON files and writing events
|
||||
//!
|
||||
//! The key insight is that both create and append operations share the same
|
||||
//! core logic once they've set up their initial state and writer.
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
use serde_json::Value;
|
||||
use std::io::Write;
|
||||
use std::path::{Path, PathBuf};
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::atomic_file::atomic_replace_file;
|
||||
use crate::detection::CompressionFormat;
|
||||
use crate::diagnostics::{Diagnostic, DiagnosticCode, DiagnosticCollector};
|
||||
use crate::diff;
|
||||
use crate::events::{Event, Observation};
|
||||
|
||||
/// Strategy for finishing the write operation.
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum FinishStrategy {
|
||||
/// Just flush the writer. Used for:
|
||||
/// - Creating new archives
|
||||
/// - Appending to uncompressed archives (same file)
|
||||
FlushOnly,
|
||||
|
||||
/// Atomic replace: swap temp file with original. Used for:
|
||||
/// - Appending to compressed archives (rewrite strategy)
|
||||
AtomicReplace {
|
||||
temp_path: PathBuf,
|
||||
output_path: PathBuf,
|
||||
},
|
||||
}
|
||||
|
||||
/// Context for writing observations to an archive.
|
||||
///
|
||||
/// This struct is the result of the "setup phase" for both create and append
|
||||
/// operations. Once you have a WriteContext, you can use `write_observations`
|
||||
/// to add new states, then call `finish` to complete the operation.
|
||||
pub struct WriteContext<W: Write> {
|
||||
/// The writer to output JSON lines to.
|
||||
pub writer: W,
|
||||
|
||||
/// Current state of the archive (used for diffing).
|
||||
pub current_state: Value,
|
||||
|
||||
/// Number of observations already in the archive.
|
||||
pub observation_count: usize,
|
||||
|
||||
/// Optional interval for writing snapshots.
|
||||
pub snapshot_interval: Option<usize>,
|
||||
|
||||
/// How to finish the write operation.
|
||||
pub finish_strategy: FinishStrategy,
|
||||
|
||||
/// Diagnostics collected during setup (e.g., warnings from reading existing archive).
|
||||
pub diagnostics: DiagnosticCollector,
|
||||
}
|
||||
|
||||
impl<W: Write> WriteContext<W> {
|
||||
/// Create a new write context.
|
||||
pub fn new(
|
||||
writer: W,
|
||||
current_state: Value,
|
||||
observation_count: usize,
|
||||
snapshot_interval: Option<usize>,
|
||||
finish_strategy: FinishStrategy,
|
||||
) -> Self {
|
||||
Self {
|
||||
writer,
|
||||
current_state,
|
||||
observation_count,
|
||||
snapshot_interval,
|
||||
finish_strategy,
|
||||
diagnostics: DiagnosticCollector::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a write context with existing diagnostics.
|
||||
pub fn with_diagnostics(
|
||||
writer: W,
|
||||
current_state: Value,
|
||||
observation_count: usize,
|
||||
snapshot_interval: Option<usize>,
|
||||
finish_strategy: FinishStrategy,
|
||||
diagnostics: DiagnosticCollector,
|
||||
) -> Self {
|
||||
Self {
|
||||
writer,
|
||||
current_state,
|
||||
observation_count,
|
||||
snapshot_interval,
|
||||
finish_strategy,
|
||||
diagnostics,
|
||||
}
|
||||
}
|
||||
|
||||
/// Write observations for a list of JSON files.
|
||||
///
|
||||
/// For each file:
|
||||
/// 1. Reads and parses the JSON
|
||||
/// 2. Diffs against current state
|
||||
/// 3. Writes observation events
|
||||
/// 4. Optionally writes a snapshot if interval is reached
|
||||
/// 5. Updates current state
|
||||
///
|
||||
/// Returns the number of observations written.
|
||||
pub fn write_observations<P: AsRef<Path>>(
|
||||
&mut self,
|
||||
files: &[P],
|
||||
) -> Result<usize, Vec<Diagnostic>> {
|
||||
let mut observations_written = 0;
|
||||
|
||||
for file_path in files.iter() {
|
||||
let file_path = file_path.as_ref();
|
||||
|
||||
// Write comment marking which file we're processing
|
||||
if let Err(e) = writeln!(self.writer, "# Processing file: {}", file_path.display()) {
|
||||
return Err(vec![Diagnostic::fatal(
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't write to the output: {}", e),
|
||||
)]);
|
||||
}
|
||||
|
||||
// Get file modification time for the observation timestamp
|
||||
let file_mtime = get_file_mtime(file_path)?;
|
||||
|
||||
// Read and parse new state
|
||||
let content = std::fs::read_to_string(file_path).map_err(|e| {
|
||||
vec![Diagnostic::fatal(
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't read the input file '{}': {}", file_path.display(), e),
|
||||
)]
|
||||
})?;
|
||||
|
||||
let new_state: Value = serde_json::from_str(&content).map_err(|e| {
|
||||
vec![Diagnostic::fatal(
|
||||
DiagnosticCode::InvalidEventJson,
|
||||
format!("I couldn't parse '{}' as JSON: {}", file_path.display(), e),
|
||||
)
|
||||
.with_advice("Make sure the file contains valid JSON.".to_string())]
|
||||
})?;
|
||||
|
||||
// Generate diff and create observation
|
||||
let observation_id = format!("obs-{}", Uuid::new_v4());
|
||||
let diff_events = diff::diff(&self.current_state, &new_state, "", &observation_id);
|
||||
|
||||
// Skip if no changes
|
||||
if diff_events.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Create and write observation
|
||||
let mut observation = Observation::new(observation_id, file_mtime);
|
||||
for event in diff_events {
|
||||
observation.add_event(event);
|
||||
}
|
||||
|
||||
self.write_observation(observation)?;
|
||||
observations_written += 1;
|
||||
self.observation_count += 1;
|
||||
|
||||
// Check if we should write a snapshot
|
||||
if self.should_write_snapshot() {
|
||||
self.write_snapshot(&new_state, file_mtime)?;
|
||||
}
|
||||
|
||||
// Update current state for next iteration
|
||||
self.current_state = new_state;
|
||||
}
|
||||
|
||||
Ok(observations_written)
|
||||
}
|
||||
|
||||
/// Write a single observation's events to the output.
|
||||
fn write_observation(&mut self, observation: Observation) -> Result<(), Vec<Diagnostic>> {
|
||||
for event in observation.to_events() {
|
||||
let event_json = serde_json::to_string(&event).map_err(|e| {
|
||||
vec![Diagnostic::fatal(
|
||||
DiagnosticCode::InvalidEventJson,
|
||||
format!("I couldn't serialize an event to JSON: {}", e),
|
||||
)]
|
||||
})?;
|
||||
|
||||
writeln!(self.writer, "{}", event_json).map_err(|e| {
|
||||
vec![Diagnostic::fatal(
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't write to the output: {}", e),
|
||||
)]
|
||||
})?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Check if we should write a snapshot based on observation count.
|
||||
fn should_write_snapshot(&self) -> bool {
|
||||
if let Some(interval) = self.snapshot_interval {
|
||||
self.observation_count > 0 && self.observation_count % interval == 0
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
/// Write a snapshot event.
|
||||
fn write_snapshot(&mut self, state: &Value, timestamp: DateTime<Utc>) -> Result<(), Vec<Diagnostic>> {
|
||||
let snapshot_id = format!("snapshot-{}", Uuid::new_v4());
|
||||
let snapshot = Event::Snapshot {
|
||||
observation_id: snapshot_id,
|
||||
timestamp,
|
||||
object: state.clone(),
|
||||
};
|
||||
|
||||
let snapshot_json = serde_json::to_string(&snapshot).map_err(|e| {
|
||||
vec![Diagnostic::fatal(
|
||||
DiagnosticCode::InvalidEventJson,
|
||||
format!("I couldn't serialize the snapshot to JSON: {}", e),
|
||||
)]
|
||||
})?;
|
||||
|
||||
writeln!(self.writer, "{}", snapshot_json).map_err(|e| {
|
||||
vec![Diagnostic::fatal(
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't write to the output: {}", e),
|
||||
)]
|
||||
})?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Finish the write operation.
|
||||
///
|
||||
/// This flushes the writer and, for compressed append operations,
|
||||
/// performs the atomic file replacement.
|
||||
pub fn finish(mut self) -> Result<DiagnosticCollector, Vec<Diagnostic>> {
|
||||
// Flush the writer
|
||||
self.writer.flush().map_err(|e| {
|
||||
vec![Diagnostic::fatal(
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't flush the output file: {}", e),
|
||||
)]
|
||||
})?;
|
||||
|
||||
// Handle atomic replacement if needed
|
||||
match self.finish_strategy {
|
||||
FinishStrategy::FlushOnly => {
|
||||
// Nothing more to do
|
||||
}
|
||||
FinishStrategy::AtomicReplace { temp_path, output_path } => {
|
||||
atomic_replace_file(&output_path, &temp_path)?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(self.diagnostics)
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the file modification time as a DateTime<Utc>.
|
||||
fn get_file_mtime<P: AsRef<Path>>(path: P) -> Result<DateTime<Utc>, Vec<Diagnostic>> {
|
||||
let path = path.as_ref();
|
||||
let metadata = std::fs::metadata(path).map_err(|e| {
|
||||
vec![Diagnostic::fatal(
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't get metadata for '{}': {}", path.display(), e),
|
||||
)]
|
||||
})?;
|
||||
|
||||
let modified = metadata.modified().map_err(|e| {
|
||||
vec![Diagnostic::fatal(
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't get modification time for '{}': {}", path.display(), e),
|
||||
)]
|
||||
})?;
|
||||
|
||||
Ok(modified.into())
|
||||
}
|
||||
|
||||
/// Encoder wrapper that provides a uniform interface for different compression formats.
|
||||
///
|
||||
/// This enum wraps the various compression encoders so we can treat them uniformly
|
||||
/// in the append-to-compressed-archive flow.
|
||||
#[cfg(feature = "compression")]
|
||||
pub enum CompressedWriter {
|
||||
Gzip(flate2::write::GzEncoder<std::fs::File>),
|
||||
Zlib(flate2::write::ZlibEncoder<std::fs::File>),
|
||||
Zstd(zstd::stream::write::Encoder<'static, std::fs::File>),
|
||||
Brotli(brotli::CompressorWriter<std::fs::File>),
|
||||
}
|
||||
|
||||
#[cfg(feature = "compression")]
|
||||
impl Write for CompressedWriter {
|
||||
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
|
||||
match self {
|
||||
CompressedWriter::Gzip(w) => w.write(buf),
|
||||
CompressedWriter::Zlib(w) => w.write(buf),
|
||||
CompressedWriter::Zstd(w) => w.write(buf),
|
||||
CompressedWriter::Brotli(w) => w.write(buf),
|
||||
}
|
||||
}
|
||||
|
||||
fn flush(&mut self) -> std::io::Result<()> {
|
||||
match self {
|
||||
CompressedWriter::Gzip(w) => w.flush(),
|
||||
CompressedWriter::Zlib(w) => w.flush(),
|
||||
CompressedWriter::Zstd(w) => w.flush(),
|
||||
CompressedWriter::Brotli(w) => w.flush(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "compression")]
|
||||
impl CompressedWriter {
|
||||
/// Create a new compressed writer for the given format and file.
|
||||
pub fn new(format: CompressionFormat, file: std::fs::File) -> Result<Self, Diagnostic> {
|
||||
use flate2::Compression;
|
||||
|
||||
match format {
|
||||
CompressionFormat::Gzip => {
|
||||
Ok(CompressedWriter::Gzip(flate2::write::GzEncoder::new(file, Compression::default())))
|
||||
}
|
||||
CompressionFormat::Zlib => {
|
||||
Ok(CompressedWriter::Zlib(flate2::write::ZlibEncoder::new(file, Compression::default())))
|
||||
}
|
||||
CompressionFormat::Zstd => {
|
||||
let encoder = zstd::stream::write::Encoder::new(file, 0).map_err(|e| {
|
||||
Diagnostic::fatal(
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't create zstd encoder: {}", e),
|
||||
)
|
||||
})?;
|
||||
Ok(CompressedWriter::Zstd(encoder))
|
||||
}
|
||||
CompressionFormat::Brotli => {
|
||||
Ok(CompressedWriter::Brotli(brotli::CompressorWriter::new(file, 4096, 11, 22)))
|
||||
}
|
||||
CompressionFormat::Deflate => {
|
||||
// Deflate is typically used within gzip/zlib, not standalone for files
|
||||
Err(Diagnostic::fatal(
|
||||
DiagnosticCode::UnsupportedVersion,
|
||||
"Standalone deflate compression is not supported for writing.".to_string(),
|
||||
))
|
||||
}
|
||||
CompressionFormat::None => {
|
||||
Err(Diagnostic::fatal(
|
||||
DiagnosticCode::UnsupportedVersion,
|
||||
"CompressedWriter::new called with CompressionFormat::None".to_string(),
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Finish compression and return any errors.
|
||||
///
|
||||
/// This must be called before the file is closed to ensure all
|
||||
/// compressed data is flushed.
|
||||
pub fn finish(self) -> Result<(), Diagnostic> {
|
||||
match self {
|
||||
CompressedWriter::Gzip(w) => {
|
||||
w.finish().map_err(|e| {
|
||||
Diagnostic::fatal(
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't finish gzip compression: {}", e),
|
||||
)
|
||||
})?;
|
||||
}
|
||||
CompressedWriter::Zlib(w) => {
|
||||
w.finish().map_err(|e| {
|
||||
Diagnostic::fatal(
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't finish zlib compression: {}", e),
|
||||
)
|
||||
})?;
|
||||
}
|
||||
CompressedWriter::Zstd(w) => {
|
||||
w.finish().map_err(|e| {
|
||||
Diagnostic::fatal(
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't finish zstd compression: {}", e),
|
||||
)
|
||||
})?;
|
||||
}
|
||||
CompressedWriter::Brotli(mut w) => {
|
||||
// Brotli doesn't have a finish() method, flush is sufficient
|
||||
w.flush().map_err(|e| {
|
||||
Diagnostic::fatal(
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't flush brotli compression: {}", e),
|
||||
)
|
||||
})?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// A write context specifically for compressed output.
|
||||
///
|
||||
/// This wraps WriteContext to handle the finish() call properly for
|
||||
/// compressed writers, which need to call finish() on the encoder
|
||||
/// before the atomic file swap.
|
||||
#[cfg(feature = "compression")]
|
||||
pub struct CompressedWriteContext {
|
||||
/// The inner write context.
|
||||
inner: WriteContext<CompressedWriter>,
|
||||
}
|
||||
|
||||
#[cfg(feature = "compression")]
|
||||
impl CompressedWriteContext {
|
||||
/// Create a new compressed write context.
|
||||
pub fn new(
|
||||
writer: CompressedWriter,
|
||||
current_state: Value,
|
||||
observation_count: usize,
|
||||
snapshot_interval: Option<usize>,
|
||||
finish_strategy: FinishStrategy,
|
||||
diagnostics: DiagnosticCollector,
|
||||
) -> Self {
|
||||
Self {
|
||||
inner: WriteContext::with_diagnostics(
|
||||
writer,
|
||||
current_state,
|
||||
observation_count,
|
||||
snapshot_interval,
|
||||
finish_strategy,
|
||||
diagnostics,
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
/// Write observations for a list of JSON files.
|
||||
pub fn write_observations<P: AsRef<Path>>(
|
||||
&mut self,
|
||||
files: &[P],
|
||||
) -> Result<usize, Vec<Diagnostic>> {
|
||||
self.inner.write_observations(files)
|
||||
}
|
||||
|
||||
/// Write raw bytes to the output (used for copying existing archive content).
|
||||
pub fn write_raw(&mut self, bytes: &[u8]) -> Result<(), Vec<Diagnostic>> {
|
||||
self.inner.writer.write_all(bytes).map_err(|e| {
|
||||
vec![Diagnostic::fatal(
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't write to the output: {}", e),
|
||||
)]
|
||||
})
|
||||
}
|
||||
|
||||
/// Finish the write operation.
|
||||
///
|
||||
/// This finishes the compression encoder, then performs any atomic
|
||||
/// file operations needed.
|
||||
pub fn finish(self) -> Result<DiagnosticCollector, Vec<Diagnostic>> {
|
||||
let finish_strategy = self.inner.finish_strategy.clone();
|
||||
let diagnostics = self.inner.diagnostics;
|
||||
|
||||
// Finish compression first
|
||||
self.inner.writer.finish().map_err(|d| vec![d])?;
|
||||
|
||||
// Then handle atomic replacement if needed
|
||||
match finish_strategy {
|
||||
FinishStrategy::FlushOnly => {
|
||||
// Nothing more to do
|
||||
}
|
||||
FinishStrategy::AtomicReplace { temp_path, output_path } => {
|
||||
atomic_replace_file(&output_path, &temp_path)?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(diagnostics)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use serde_json::json;
|
||||
|
||||
#[test]
|
||||
fn test_write_context_single_observation() {
|
||||
let mut output = Vec::new();
|
||||
let initial_state = json!({"count": 0});
|
||||
|
||||
{
|
||||
let mut ctx = WriteContext::new(
|
||||
&mut output,
|
||||
initial_state,
|
||||
0,
|
||||
None,
|
||||
FinishStrategy::FlushOnly,
|
||||
);
|
||||
|
||||
// Create a temp file with new state
|
||||
let mut temp_file = tempfile::NamedTempFile::new().unwrap();
|
||||
std::io::Write::write_all(&mut temp_file, br#"{"count": 1}"#).unwrap();
|
||||
temp_file.flush().unwrap();
|
||||
|
||||
let count = ctx.write_observations(&[temp_file.path()]).unwrap();
|
||||
assert_eq!(count, 1);
|
||||
}
|
||||
|
||||
let output_str = String::from_utf8(output).unwrap();
|
||||
assert!(output_str.contains("# Processing file:"));
|
||||
assert!(output_str.contains("observe"));
|
||||
assert!(output_str.contains("change"));
|
||||
assert!(output_str.contains("/count"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_write_context_no_changes() {
|
||||
let mut output = Vec::new();
|
||||
let initial_state = json!({"count": 0});
|
||||
|
||||
{
|
||||
let mut ctx = WriteContext::new(
|
||||
&mut output,
|
||||
initial_state,
|
||||
0,
|
||||
None,
|
||||
FinishStrategy::FlushOnly,
|
||||
);
|
||||
|
||||
// Create a temp file with same state
|
||||
let mut temp_file = tempfile::NamedTempFile::new().unwrap();
|
||||
std::io::Write::write_all(&mut temp_file, br#"{"count": 0}"#).unwrap();
|
||||
temp_file.flush().unwrap();
|
||||
|
||||
let count = ctx.write_observations(&[temp_file.path()]).unwrap();
|
||||
assert_eq!(count, 0);
|
||||
}
|
||||
|
||||
let output_str = String::from_utf8(output).unwrap();
|
||||
// Should have comment but no events
|
||||
assert!(output_str.contains("# Processing file:"));
|
||||
assert!(!output_str.contains("observe"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_should_write_snapshot() {
|
||||
let output: Vec<u8> = Vec::new();
|
||||
|
||||
// No interval set
|
||||
let ctx: WriteContext<Vec<u8>> = WriteContext::new(
|
||||
output.clone(),
|
||||
json!({}),
|
||||
5,
|
||||
None,
|
||||
FinishStrategy::FlushOnly,
|
||||
);
|
||||
assert!(!ctx.should_write_snapshot());
|
||||
|
||||
// Interval of 2, at observation 4 (multiple of 2)
|
||||
let ctx: WriteContext<Vec<u8>> = WriteContext::new(
|
||||
output.clone(),
|
||||
json!({}),
|
||||
4,
|
||||
Some(2),
|
||||
FinishStrategy::FlushOnly,
|
||||
);
|
||||
assert!(ctx.should_write_snapshot());
|
||||
|
||||
// Interval of 2, at observation 3 (not multiple of 2)
|
||||
let ctx: WriteContext<Vec<u8>> = WriteContext::new(
|
||||
output,
|
||||
json!({}),
|
||||
3,
|
||||
Some(2),
|
||||
FinishStrategy::FlushOnly,
|
||||
);
|
||||
assert!(!ctx.should_write_snapshot());
|
||||
}
|
||||
}
|
||||
|
|
@ -135,10 +135,17 @@ pub fn open_archive<P: AsRef<Path>>(path: P) -> Result<OpenedArchive, Diagnostic
|
|||
///
|
||||
/// Returns a diagnostic error if compression was detected but the binary
|
||||
/// was built without compression support.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `format` - The detected compression format
|
||||
/// * `filename` - Path to the file (for error messages)
|
||||
/// * `action` - Description of what we're trying to do, e.g. "read" or "write"
|
||||
#[cfg_attr(feature = "compression", allow(unused_variables))]
|
||||
pub fn check_compression_support(
|
||||
format: CompressionFormat,
|
||||
filename: &str,
|
||||
filename: &Path,
|
||||
action: &'static str,
|
||||
) -> Result<(), Diagnostic> {
|
||||
#[cfg(not(feature = "compression"))]
|
||||
if format != CompressionFormat::None {
|
||||
|
|
@ -154,11 +161,12 @@ pub fn check_compression_support(
|
|||
return Err(Diagnostic::fatal(
|
||||
DiagnosticCode::UnsupportedVersion,
|
||||
format!(
|
||||
"I detected a {}-compressed archive, but this build doesn't support compression.",
|
||||
format_name
|
||||
"I inferred that you wanted to {} a {}-compressed archive at:\n\n {}\n\n\
|
||||
However, this build does not include compression libraries.",
|
||||
action, format_name, filename.display()
|
||||
),
|
||||
)
|
||||
.with_location(filename.to_string(), 1)
|
||||
.with_location(filename.display().to_string(), 1)
|
||||
.with_advice(
|
||||
"This binary was built without compression support to reduce binary size and dependencies.\n\
|
||||
You have two options:\n\
|
||||
|
|
@ -175,7 +183,9 @@ pub fn check_compression_support(
|
|||
///
|
||||
/// This opens the file, reads magic bytes, and returns the compression format.
|
||||
/// Useful when you need to know the format before deciding how to process the file.
|
||||
pub fn detect_archive_compression<P: AsRef<Path>>(path: P) -> Result<CompressionFormat, Diagnostic> {
|
||||
pub fn detect_archive_compression<P: AsRef<Path>>(
|
||||
path: P,
|
||||
) -> Result<CompressionFormat, Diagnostic> {
|
||||
let path = path.as_ref();
|
||||
let filename = path.display().to_string();
|
||||
|
||||
|
|
@ -208,7 +218,11 @@ mod tests {
|
|||
#[test]
|
||||
fn test_open_uncompressed_archive() {
|
||||
let mut temp_file = NamedTempFile::new().unwrap();
|
||||
writeln!(temp_file, r#"{{"type":"@peoplesgrocers/json-archive","version":1}}"#).unwrap();
|
||||
writeln!(
|
||||
temp_file,
|
||||
r#"{{"type":"@peoplesgrocers/json-archive","version":1}}"#
|
||||
)
|
||||
.unwrap();
|
||||
temp_file.flush().unwrap();
|
||||
|
||||
let opened = open_archive(temp_file.path()).unwrap();
|
||||
|
|
|
|||
|
|
@ -1,644 +0,0 @@
|
|||
// json-archive is a tool for tracking JSON file changes over time
|
||||
// Copyright (C) 2025 Peoples Grocers LLC
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as published
|
||||
// by the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
//
|
||||
// To purchase a license under different terms contact admin@peoplesgrocers.com
|
||||
// To request changes, report bugs, or give user feedback contact
|
||||
// marxism@peoplesgrocers.com
|
||||
//
|
||||
|
||||
//! High-level archive operations: create and append.
|
||||
//!
|
||||
//! This module provides the top-level entry points for creating and appending
|
||||
//! to archives. These functions handle all the setup (opening files, detecting
|
||||
//! compression, reading existing state) and then delegate to the shared
|
||||
//! `WriteContext` for the actual observation writing.
|
||||
//!
|
||||
//! ## Architecture
|
||||
//!
|
||||
//! ```text
|
||||
//! ┌─────────────────┐
|
||||
//! │ archive_ops.rs │
|
||||
//! │ (this module) │
|
||||
//! └────────┬────────┘
|
||||
//! │
|
||||
//! ┌─────────────────┼─────────────────┐
|
||||
//! │ │ │
|
||||
//! ▼ ▼ ▼
|
||||
//! ┌───────────────┐ ┌───────────────┐ ┌───────────────┐
|
||||
//! │ archive_open │ │archive_context│ │ archive_reader│
|
||||
//! │ (compression) │ │ (WriteContext)│ │ (parsing) │
|
||||
//! └───────────────┘ └───────────────┘ └───────────────┘
|
||||
//! ```
|
||||
//!
|
||||
//! ## Operations
|
||||
//!
|
||||
//! - `create_archive`: Create a new archive from one or more JSON files
|
||||
//! - `append_to_archive`: Add observations to an existing archive
|
||||
|
||||
use std::fs::{File, OpenOptions};
|
||||
use std::io::{BufWriter, Read, Write};
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::archive_context::{FinishStrategy, WriteContext};
|
||||
use crate::archive_open::{check_compression_support, detect_archive_compression, open_archive};
|
||||
use crate::archive_reader::{ArchiveReader, ReadMode};
|
||||
use crate::atomic_file::generate_temp_filename;
|
||||
use crate::detection::CompressionFormat;
|
||||
use crate::diagnostics::{Diagnostic, DiagnosticCode};
|
||||
use crate::events::Header;
|
||||
|
||||
#[cfg(feature = "compression")]
|
||||
use crate::archive_context::{CompressedWriteContext, CompressedWriter};
|
||||
|
||||
/// Create a new archive from a list of JSON files.
|
||||
///
|
||||
/// The first file becomes the initial state in the header. Each subsequent
|
||||
/// file generates an observation with the diff from the previous state.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `input_files` - List of JSON files to process (at least one required)
|
||||
/// * `output_path` - Path for the new archive file
|
||||
/// * `source` - Optional source identifier for the header
|
||||
/// * `snapshot_interval` - Optional interval for writing snapshots
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// Returns an empty Vec on success, or a Vec of diagnostics on error.
|
||||
pub fn create_archive<P: AsRef<Path>>(
|
||||
input_files: &[P],
|
||||
output_path: P,
|
||||
source: Option<String>,
|
||||
snapshot_interval: Option<usize>,
|
||||
) -> Vec<Diagnostic> {
|
||||
if input_files.is_empty() {
|
||||
return vec![Diagnostic::fatal(
|
||||
DiagnosticCode::MissingHeaderField,
|
||||
"I need at least one input file to create an archive.".to_string(),
|
||||
)];
|
||||
}
|
||||
|
||||
// Read and parse the first file to get initial state
|
||||
let first_path = input_files[0].as_ref();
|
||||
let first_content = match std::fs::read_to_string(first_path) {
|
||||
Ok(content) => content,
|
||||
Err(e) => {
|
||||
return vec![Diagnostic::fatal(
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't read the first input file '{}': {}", first_path.display(), e),
|
||||
)];
|
||||
}
|
||||
};
|
||||
|
||||
let initial_state: Value = match serde_json::from_str(&first_content) {
|
||||
Ok(state) => state,
|
||||
Err(e) => {
|
||||
return vec![Diagnostic::fatal(
|
||||
DiagnosticCode::InvalidEventJson,
|
||||
format!("I couldn't parse '{}' as JSON: {}", first_path.display(), e),
|
||||
)
|
||||
.with_advice("Make sure the file contains valid JSON.".to_string())];
|
||||
}
|
||||
};
|
||||
|
||||
// Create the output file
|
||||
let output_path = output_path.as_ref();
|
||||
let file = match File::create(output_path) {
|
||||
Ok(f) => f,
|
||||
Err(e) => {
|
||||
return vec![Diagnostic::fatal(
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't create the output file '{}': {}", output_path.display(), e),
|
||||
)
|
||||
.with_advice(
|
||||
"Make sure you have write permission in this directory and that the path is valid."
|
||||
.to_string(),
|
||||
)];
|
||||
}
|
||||
};
|
||||
|
||||
let mut writer = BufWriter::new(file);
|
||||
|
||||
// Write the header
|
||||
let header = Header::new(initial_state.clone(), source);
|
||||
let header_json = match serde_json::to_string(&header) {
|
||||
Ok(json) => json,
|
||||
Err(e) => {
|
||||
return vec![Diagnostic::fatal(
|
||||
DiagnosticCode::InvalidEventJson,
|
||||
format!("I couldn't serialize the header to JSON: {}", e),
|
||||
)];
|
||||
}
|
||||
};
|
||||
|
||||
if let Err(e) = writeln!(writer, "{}", header_json) {
|
||||
return vec![Diagnostic::fatal(
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't write to the output file: {}", e),
|
||||
)];
|
||||
}
|
||||
|
||||
// If there are more files, process them through WriteContext
|
||||
if input_files.len() > 1 {
|
||||
let mut ctx = WriteContext::new(
|
||||
writer,
|
||||
initial_state,
|
||||
0,
|
||||
snapshot_interval,
|
||||
FinishStrategy::FlushOnly,
|
||||
);
|
||||
|
||||
// Process remaining files (skip the first one which is now the initial state)
|
||||
let remaining_files: Vec<&Path> = input_files[1..].iter().map(|p| p.as_ref()).collect();
|
||||
if let Err(diagnostics) = ctx.write_observations(&remaining_files) {
|
||||
return diagnostics;
|
||||
}
|
||||
|
||||
if let Err(diagnostics) = ctx.finish() {
|
||||
return diagnostics;
|
||||
}
|
||||
} else {
|
||||
// Just flush the header
|
||||
if let Err(e) = writer.flush() {
|
||||
return vec![Diagnostic::fatal(
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't flush the output file: {}", e),
|
||||
)];
|
||||
}
|
||||
}
|
||||
|
||||
Vec::new()
|
||||
}
|
||||
|
||||
/// Append observations to an existing archive.
|
||||
///
|
||||
/// This function handles both compressed and uncompressed archives:
|
||||
/// - Uncompressed: Opens in append mode and writes new observations directly
|
||||
/// - Compressed: Reads entire archive, writes to temp file, atomic swap
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `archive_path` - Path to the existing archive
|
||||
/// * `new_files` - List of JSON files to add as observations
|
||||
/// * `output_path` - Where to write the result (can be same as archive_path)
|
||||
/// * `source` - Optional source identifier (not currently used for append)
|
||||
/// * `snapshot_interval` - Optional interval for writing snapshots
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// Returns an empty Vec on success, or a Vec of diagnostics on error.
|
||||
pub fn append_to_archive<P: AsRef<Path>, Q: AsRef<Path>>(
|
||||
archive_path: P,
|
||||
new_files: &[Q],
|
||||
output_path: P,
|
||||
_source: Option<String>,
|
||||
snapshot_interval: Option<usize>,
|
||||
) -> Vec<Diagnostic> {
|
||||
let archive_path = archive_path.as_ref();
|
||||
let output_path = output_path.as_ref();
|
||||
|
||||
// Detect compression format
|
||||
let format = match detect_archive_compression(archive_path) {
|
||||
Ok(f) => f,
|
||||
Err(diag) => return vec![diag],
|
||||
};
|
||||
|
||||
// Check if this build supports the detected compression
|
||||
if let Err(diag) = check_compression_support(format, &archive_path.display().to_string()) {
|
||||
return vec![diag];
|
||||
}
|
||||
|
||||
if format == CompressionFormat::None {
|
||||
append_to_uncompressed_archive(archive_path, new_files, output_path, snapshot_interval)
|
||||
} else {
|
||||
append_to_compressed_archive(archive_path, new_files, output_path, format, snapshot_interval)
|
||||
}
|
||||
}
|
||||
|
||||
/// Append to an uncompressed archive.
|
||||
///
|
||||
/// This reads the archive to get the final state, then opens the file
|
||||
/// in append mode to add new observations.
|
||||
fn append_to_uncompressed_archive<P: AsRef<Path>, Q: AsRef<Path>>(
|
||||
archive_path: P,
|
||||
new_files: &[Q],
|
||||
output_path: P,
|
||||
snapshot_interval: Option<usize>,
|
||||
) -> Vec<Diagnostic> {
|
||||
let archive_path = archive_path.as_ref();
|
||||
let output_path = output_path.as_ref();
|
||||
|
||||
// Read the existing archive to get final state
|
||||
let reader = match ArchiveReader::new(archive_path, ReadMode::AppendSeek) {
|
||||
Ok(r) => r,
|
||||
Err(e) => {
|
||||
return vec![Diagnostic::fatal(
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't open the archive for reading: {}", e),
|
||||
)];
|
||||
}
|
||||
};
|
||||
|
||||
let read_result = match reader.read(archive_path) {
|
||||
Ok(result) => result,
|
||||
Err(e) => {
|
||||
return vec![Diagnostic::fatal(
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't read the archive: {}", e),
|
||||
)];
|
||||
}
|
||||
};
|
||||
|
||||
// Check for fatal diagnostics in the archive
|
||||
if read_result.diagnostics.has_fatal() {
|
||||
let mut diagnostics = vec![Diagnostic::fatal(
|
||||
DiagnosticCode::InvalidEventJson,
|
||||
"The existing archive contains fatal errors. Cannot append to a corrupt archive."
|
||||
.to_string(),
|
||||
)];
|
||||
diagnostics.extend(read_result.diagnostics.into_diagnostics());
|
||||
return diagnostics;
|
||||
}
|
||||
|
||||
// If output path is different from archive path, copy the archive first
|
||||
if archive_path != output_path {
|
||||
if let Err(e) = std::fs::copy(archive_path, output_path) {
|
||||
return vec![Diagnostic::fatal(
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't copy the archive to the output location: {}", e),
|
||||
)];
|
||||
}
|
||||
}
|
||||
|
||||
// Open file in append mode
|
||||
let file = match OpenOptions::new().append(true).open(output_path) {
|
||||
Ok(f) => f,
|
||||
Err(e) => {
|
||||
return vec![Diagnostic::fatal(
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't open the archive file for appending: {}", e),
|
||||
)
|
||||
.with_advice(
|
||||
"Make sure the archive file exists and you have write permission.".to_string(),
|
||||
)];
|
||||
}
|
||||
};
|
||||
|
||||
// Create write context and process files
|
||||
let mut ctx = WriteContext::with_diagnostics(
|
||||
file,
|
||||
read_result.final_state,
|
||||
read_result.observation_count,
|
||||
snapshot_interval,
|
||||
FinishStrategy::FlushOnly,
|
||||
read_result.diagnostics,
|
||||
);
|
||||
|
||||
let file_refs: Vec<&Path> = new_files.iter().map(|p| p.as_ref()).collect();
|
||||
if let Err(diagnostics) = ctx.write_observations(&file_refs) {
|
||||
return diagnostics;
|
||||
}
|
||||
|
||||
match ctx.finish() {
|
||||
Ok(collector) => collector.into_diagnostics(),
|
||||
Err(diagnostics) => diagnostics,
|
||||
}
|
||||
}
|
||||
|
||||
/// Append to a compressed archive.
|
||||
///
|
||||
/// This reads the entire archive (decompressing), writes everything to a
|
||||
/// new compressed temp file with the new observations, then atomically
|
||||
/// swaps the temp file with the original.
|
||||
#[cfg(feature = "compression")]
|
||||
fn append_to_compressed_archive<P: AsRef<Path>, Q: AsRef<Path>>(
|
||||
archive_path: P,
|
||||
new_files: &[Q],
|
||||
output_path: P,
|
||||
format: CompressionFormat,
|
||||
snapshot_interval: Option<usize>,
|
||||
) -> Vec<Diagnostic> {
|
||||
let archive_path = archive_path.as_ref();
|
||||
let output_path = output_path.as_ref();
|
||||
|
||||
// Step 1: Open and decompress the archive, reading all bytes
|
||||
let opened = match open_archive(archive_path) {
|
||||
Ok(o) => o,
|
||||
Err(diag) => return vec![diag],
|
||||
};
|
||||
|
||||
// Read all decompressed bytes into memory
|
||||
let mut decompressed_bytes = Vec::new();
|
||||
let mut reader = opened.reader;
|
||||
if let Err(e) = reader.read_to_end(&mut decompressed_bytes) {
|
||||
return vec![Diagnostic::fatal(
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't read the compressed archive: {}", e),
|
||||
)];
|
||||
}
|
||||
|
||||
// Step 2: Parse the archive to get final state using AppendSeek mode
|
||||
// We need to re-read from the decompressed bytes
|
||||
let archive_reader = match ArchiveReader::new(archive_path, ReadMode::AppendSeek) {
|
||||
Ok(r) => r,
|
||||
Err(e) => {
|
||||
return vec![Diagnostic::fatal(
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't create archive reader: {}", e),
|
||||
)];
|
||||
}
|
||||
};
|
||||
|
||||
let read_result = match archive_reader.read(archive_path) {
|
||||
Ok(result) => result,
|
||||
Err(e) => {
|
||||
return vec![Diagnostic::fatal(
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't parse the archive: {}", e),
|
||||
)];
|
||||
}
|
||||
};
|
||||
|
||||
// Check for fatal diagnostics
|
||||
if read_result.diagnostics.has_fatal() {
|
||||
let mut diagnostics = vec![Diagnostic::fatal(
|
||||
DiagnosticCode::InvalidEventJson,
|
||||
"The existing archive contains fatal errors. Cannot append to a corrupt archive."
|
||||
.to_string(),
|
||||
)];
|
||||
diagnostics.extend(read_result.diagnostics.into_diagnostics());
|
||||
return diagnostics;
|
||||
}
|
||||
|
||||
// Step 3: Create temp file with same compression format
|
||||
let temp_path = generate_temp_filename(output_path);
|
||||
let temp_file = match File::create(&temp_path) {
|
||||
Ok(f) => f,
|
||||
Err(e) => {
|
||||
return vec![Diagnostic::fatal(
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't create temp file: {}", e),
|
||||
)];
|
||||
}
|
||||
};
|
||||
|
||||
// Create compressed writer
|
||||
let compressed_writer = match CompressedWriter::new(format, temp_file) {
|
||||
Ok(w) => w,
|
||||
Err(diag) => {
|
||||
let _ = std::fs::remove_file(&temp_path);
|
||||
return vec![diag];
|
||||
}
|
||||
};
|
||||
|
||||
// Step 4: Create write context and copy old data + write new observations
|
||||
let mut ctx = CompressedWriteContext::new(
|
||||
compressed_writer,
|
||||
read_result.final_state,
|
||||
read_result.observation_count,
|
||||
snapshot_interval,
|
||||
FinishStrategy::AtomicReplace {
|
||||
temp_path: temp_path.clone(),
|
||||
output_path: output_path.to_path_buf(),
|
||||
},
|
||||
read_result.diagnostics,
|
||||
);
|
||||
|
||||
// Write all old decompressed bytes first
|
||||
if let Err(diagnostics) = ctx.write_raw(&decompressed_bytes) {
|
||||
let _ = std::fs::remove_file(&temp_path);
|
||||
return diagnostics;
|
||||
}
|
||||
|
||||
// Write new observations
|
||||
let file_refs: Vec<&Path> = new_files.iter().map(|p| p.as_ref()).collect();
|
||||
if let Err(diagnostics) = ctx.write_observations(&file_refs) {
|
||||
let _ = std::fs::remove_file(&temp_path);
|
||||
return diagnostics;
|
||||
}
|
||||
|
||||
// Finish (this handles compression finalization and atomic swap)
|
||||
match ctx.finish() {
|
||||
Ok(collector) => collector.into_diagnostics(),
|
||||
Err(diagnostics) => {
|
||||
let _ = std::fs::remove_file(&temp_path);
|
||||
diagnostics
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Stub for when compression feature is not enabled.
|
||||
#[cfg(not(feature = "compression"))]
|
||||
fn append_to_compressed_archive<P: AsRef<Path>, Q: AsRef<Path>>(
|
||||
archive_path: P,
|
||||
_new_files: &[Q],
|
||||
_output_path: P,
|
||||
format: CompressionFormat,
|
||||
_snapshot_interval: Option<usize>,
|
||||
) -> Vec<Diagnostic> {
|
||||
let format_name = match format {
|
||||
CompressionFormat::Gzip => "gzip",
|
||||
CompressionFormat::Deflate => "deflate",
|
||||
CompressionFormat::Zlib => "zlib",
|
||||
CompressionFormat::Brotli => "brotli",
|
||||
CompressionFormat::Zstd => "zstd",
|
||||
CompressionFormat::None => unreachable!(),
|
||||
};
|
||||
|
||||
vec![Diagnostic::fatal(
|
||||
DiagnosticCode::UnsupportedVersion,
|
||||
format!(
|
||||
"I detected a {}-compressed archive, but this build doesn't support compression.",
|
||||
format_name
|
||||
),
|
||||
)
|
||||
.with_location(archive_path.as_ref().display().to_string(), 1)
|
||||
.with_advice(
|
||||
"This binary was built without compression support.\n\
|
||||
Install with compression: cargo install json-archive --features compression\n\
|
||||
Or decompress the file first."
|
||||
.to_string(),
|
||||
)]
|
||||
}
|
||||
|
||||
/// Generate default output filename from input filename.
|
||||
///
|
||||
/// - `test.json` -> `test.json.archive`
|
||||
/// - `test.txt` -> `test.txt.json.archive`
|
||||
/// - `test` -> `test.json.archive`
|
||||
/// - `test.json.archive` -> `test.json.archive` (unchanged)
|
||||
pub fn default_output_filename<P: AsRef<Path>>(input_path: P) -> PathBuf {
|
||||
let path = input_path.as_ref();
|
||||
let mut output = path.to_path_buf();
|
||||
|
||||
// If it already ends with .json.archive, don't modify it
|
||||
if let Some(filename) = path.file_name() {
|
||||
if let Some(filename_str) = filename.to_str() {
|
||||
if filename_str.ends_with(".json.archive") {
|
||||
return output;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Add .json.archive extension
|
||||
if let Some(extension) = path.extension() {
|
||||
if extension == "json" {
|
||||
// Replace .json with .json.archive
|
||||
output.set_extension("json.archive");
|
||||
} else {
|
||||
// Append .json.archive to whatever extension exists
|
||||
let new_extension = format!("{}.json.archive", extension.to_string_lossy());
|
||||
output.set_extension(new_extension);
|
||||
}
|
||||
} else {
|
||||
// No extension, just add .json.archive
|
||||
output.set_extension("json.archive");
|
||||
}
|
||||
|
||||
output
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use serde_json::json;
|
||||
use std::io::Write as IoWrite;
|
||||
use tempfile::NamedTempFile;
|
||||
|
||||
#[test]
|
||||
fn test_create_archive_single_file() -> Result<(), Box<dyn std::error::Error>> {
|
||||
// Create input file
|
||||
let mut input_file = NamedTempFile::new()?;
|
||||
writeln!(input_file, r#"{{"count": 0, "name": "test"}}"#)?;
|
||||
input_file.flush()?;
|
||||
|
||||
// Create output file
|
||||
let output_file = NamedTempFile::new()?;
|
||||
|
||||
let diagnostics = create_archive(
|
||||
&[input_file.path()],
|
||||
output_file.path(),
|
||||
Some("test-source".to_string()),
|
||||
None,
|
||||
);
|
||||
|
||||
assert!(diagnostics.is_empty(), "Expected no errors: {:?}", diagnostics);
|
||||
|
||||
// Verify the output
|
||||
let content = std::fs::read_to_string(output_file.path())?;
|
||||
let header: Header = serde_json::from_str(content.lines().next().unwrap())?;
|
||||
assert_eq!(header.file_type, "@peoplesgrocers/json-archive");
|
||||
assert_eq!(header.version, 1);
|
||||
assert_eq!(header.initial, json!({"count": 0, "name": "test"}));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_create_archive_multiple_files() -> Result<(), Box<dyn std::error::Error>> {
|
||||
// Create input files
|
||||
let mut file1 = NamedTempFile::new()?;
|
||||
let mut file2 = NamedTempFile::new()?;
|
||||
writeln!(file1, r#"{{"count": 0}}"#)?;
|
||||
writeln!(file2, r#"{{"count": 1}}"#)?;
|
||||
file1.flush()?;
|
||||
file2.flush()?;
|
||||
|
||||
let output_file = NamedTempFile::new()?;
|
||||
|
||||
let diagnostics = create_archive(
|
||||
&[file1.path(), file2.path()],
|
||||
output_file.path(),
|
||||
None,
|
||||
None,
|
||||
);
|
||||
|
||||
assert!(diagnostics.is_empty(), "Expected no errors: {:?}", diagnostics);
|
||||
|
||||
// Verify output has header + observation events
|
||||
let content = std::fs::read_to_string(output_file.path())?;
|
||||
let lines: Vec<&str> = content.lines().collect();
|
||||
assert!(lines.len() >= 3); // header + comment + observe + change
|
||||
|
||||
// First line should be header
|
||||
let header: Header = serde_json::from_str(lines[0])?;
|
||||
assert_eq!(header.initial, json!({"count": 0}));
|
||||
|
||||
// Should contain observe and change events
|
||||
assert!(content.contains("observe"));
|
||||
assert!(content.contains("change"));
|
||||
assert!(content.contains("/count"));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_append_to_uncompressed_archive() -> Result<(), Box<dyn std::error::Error>> {
|
||||
// Create initial archive
|
||||
let mut archive_file = NamedTempFile::new()?;
|
||||
let header = Header::new(json!({"count": 0}), None);
|
||||
writeln!(archive_file, "{}", serde_json::to_string(&header)?)?;
|
||||
archive_file.flush()?;
|
||||
|
||||
// Create file to append
|
||||
let mut new_file = NamedTempFile::new()?;
|
||||
writeln!(new_file, r#"{{"count": 1}}"#)?;
|
||||
new_file.flush()?;
|
||||
|
||||
let diagnostics = append_to_archive(
|
||||
archive_file.path(),
|
||||
&[new_file.path()],
|
||||
archive_file.path(),
|
||||
None,
|
||||
None,
|
||||
);
|
||||
|
||||
assert!(diagnostics.is_empty(), "Expected no errors: {:?}", diagnostics);
|
||||
|
||||
// Verify the archive was updated
|
||||
let content = std::fs::read_to_string(archive_file.path())?;
|
||||
assert!(content.contains("observe"));
|
||||
assert!(content.contains("change"));
|
||||
assert!(content.contains("/count"));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_default_output_filename() {
|
||||
assert_eq!(
|
||||
default_output_filename("test.json"),
|
||||
PathBuf::from("test.json.archive")
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
default_output_filename("test.txt"),
|
||||
PathBuf::from("test.txt.json.archive")
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
default_output_filename("test"),
|
||||
PathBuf::from("test.json.archive")
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
default_output_filename("test.json.archive"),
|
||||
PathBuf::from("test.json.archive")
|
||||
);
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -110,7 +110,10 @@ pub fn generate_temp_filename<P: AsRef<Path>>(path: P) -> PathBuf {
|
|||
///
|
||||
/// Returns diagnostics if any step of the operation fails. The function
|
||||
/// attempts automatic recovery by restoring the backup if the replacement fails.
|
||||
pub fn atomic_replace_file<P: AsRef<Path>>(original_path: P, temp_path: P) -> Result<(), Vec<Diagnostic>> {
|
||||
pub fn atomic_replace_file<P: AsRef<Path>>(
|
||||
original_path: P,
|
||||
temp_path: P,
|
||||
) -> Result<(), Vec<Diagnostic>> {
|
||||
let original = original_path.as_ref();
|
||||
let temp = temp_path.as_ref();
|
||||
|
||||
|
|
@ -119,12 +122,13 @@ pub fn atomic_replace_file<P: AsRef<Path>>(original_path: P, temp_path: P) -> Re
|
|||
if let Some(filename_str) = filename.to_str() {
|
||||
// Extract random suffix from temp filename if it follows our pattern
|
||||
let temp_filename = temp.file_name().and_then(|f| f.to_str()).unwrap_or("");
|
||||
let random_suffix = if temp_filename.starts_with('.') && temp_filename.contains(filename_str) {
|
||||
// Extract suffix after the original filename
|
||||
temp_filename.rsplit('.').next().unwrap_or("backup")
|
||||
} else {
|
||||
"backup"
|
||||
};
|
||||
let random_suffix =
|
||||
if temp_filename.starts_with('.') && temp_filename.contains(filename_str) {
|
||||
// Extract suffix after the original filename
|
||||
temp_filename.rsplit('.').next().unwrap_or("backup")
|
||||
} else {
|
||||
"backup"
|
||||
};
|
||||
|
||||
let backup_filename = format!(".{}.{}.old", filename_str, random_suffix);
|
||||
if let Some(parent) = original.parent() {
|
||||
|
|
@ -148,7 +152,7 @@ pub fn atomic_replace_file<P: AsRef<Path>>(original_path: P, temp_path: P) -> Re
|
|||
)
|
||||
.with_advice(
|
||||
"Make sure you have write permission in this directory and sufficient disk space."
|
||||
.to_string()
|
||||
.to_string(),
|
||||
)]);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -15,7 +15,8 @@ fn print_example(pointer_str: &str, value: &mut serde_json::Value) {
|
|||
}
|
||||
|
||||
fn main() {
|
||||
print!(r#"<!-- Generated by: cargo run --bin pointer_errors_demo > docs/diagnostics/json-pointer.md -->
|
||||
print!(
|
||||
r#"<!-- Generated by: cargo run --bin pointer_errors_demo > docs/diagnostics/json-pointer.md -->
|
||||
|
||||
# JSON Pointer Diagnostics
|
||||
|
||||
|
|
@ -52,7 +53,8 @@ or submit a pull request.
|
|||
|
||||
Key doesn't exist in the object. Shows available keys and suggests typos.
|
||||
|
||||
"#);
|
||||
"#
|
||||
);
|
||||
|
||||
print_example(
|
||||
"/user/emial",
|
||||
|
|
@ -65,13 +67,15 @@ Key doesn't exist in the object. Shows available keys and suggests typos.
|
|||
}),
|
||||
);
|
||||
|
||||
print!(r#"
|
||||
print!(
|
||||
r#"
|
||||
## Type Mismatch
|
||||
|
||||
Tried to index into a value that doesn't support it (e.g., `/domain` on a string,
|
||||
`/0` on a number). Shows the actual type.
|
||||
|
||||
"#);
|
||||
"#
|
||||
);
|
||||
|
||||
print_example(
|
||||
"/users/0/email/domain",
|
||||
|
|
@ -82,12 +86,14 @@ Tried to index into a value that doesn't support it (e.g., `/domain` on a string
|
|||
}),
|
||||
);
|
||||
|
||||
print!(r#"
|
||||
print!(
|
||||
r#"
|
||||
## Array Index Out of Bounds
|
||||
|
||||
Index past the end of the array. Shows the array length.
|
||||
|
||||
"#);
|
||||
"#
|
||||
);
|
||||
|
||||
print_example(
|
||||
"/items/5",
|
||||
|
|
@ -96,12 +102,14 @@ Index past the end of the array. Shows the array length.
|
|||
}),
|
||||
);
|
||||
|
||||
print!(r#"
|
||||
print!(
|
||||
r#"
|
||||
## Array Index
|
||||
|
||||
If you think you have an object but you're actually indexing into an array, you'll see this error.
|
||||
|
||||
"#);
|
||||
"#
|
||||
);
|
||||
|
||||
print_example(
|
||||
"/items/foo",
|
||||
|
|
@ -110,13 +118,15 @@ If you think you have an object but you're actually indexing into an array, you'
|
|||
}),
|
||||
);
|
||||
|
||||
print!(r#"
|
||||
print!(
|
||||
r#"
|
||||
## Deep Path Failures
|
||||
|
||||
For long paths, the underline shows which segment failed. The full path remains
|
||||
visible so you can see what you were trying to reach.
|
||||
|
||||
"#);
|
||||
"#
|
||||
);
|
||||
|
||||
print_example(
|
||||
"/data/users/0/profile/settings/theme",
|
||||
|
|
|
|||
|
|
@ -21,7 +21,9 @@
|
|||
|
||||
use crate::flags;
|
||||
use chrono::{DateTime, Utc};
|
||||
use json_archive::{Diagnostic, DiagnosticCode, DiagnosticLevel, Event};
|
||||
use json_archive::archive_open::open_archive;
|
||||
use json_archive::detection::CompressionFormat;
|
||||
use json_archive::{read_events, Diagnostic, DiagnosticCode, DiagnosticLevel, Event};
|
||||
use serde::Serialize;
|
||||
use std::path::Path;
|
||||
|
||||
|
|
@ -46,6 +48,7 @@ struct JsonObservation {
|
|||
#[derive(Serialize)]
|
||||
struct JsonInfoOutput {
|
||||
archive: String,
|
||||
compression: String,
|
||||
created: String,
|
||||
file_size: u64,
|
||||
snapshot_count: usize,
|
||||
|
|
@ -54,9 +57,9 @@ struct JsonInfoOutput {
|
|||
efficiency_percent: f64,
|
||||
}
|
||||
|
||||
pub fn run(flags: &flags::Info) -> Vec<Diagnostic> {
|
||||
pub fn run(flags: &flags::Info) -> Result<(), Vec<Diagnostic>> {
|
||||
if !flags.file.exists() {
|
||||
return vec![Diagnostic::new(
|
||||
return Err(vec![Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't find the archive file: {}", flags.file.display()),
|
||||
|
|
@ -65,12 +68,13 @@ pub fn run(flags: &flags::Info) -> Vec<Diagnostic> {
|
|||
"Make sure the file path is correct and the file exists. \
|
||||
Check for typos in the filename."
|
||||
.to_string(),
|
||||
)];
|
||||
)]);
|
||||
}
|
||||
|
||||
let (observations, snapshot_count) = match collect_observations(&flags.file) {
|
||||
Ok((obs, count)) => (obs, count),
|
||||
Err(diagnostics) => return diagnostics,
|
||||
let (observations, snapshot_count, compression_format) = match collect_observations(&flags.file)
|
||||
{
|
||||
Ok((obs, count, format)) => (obs, count, format),
|
||||
Err(diagnostics) => return Err(diagnostics),
|
||||
};
|
||||
|
||||
let file_size = match std::fs::metadata(&flags.file) {
|
||||
|
|
@ -79,7 +83,10 @@ pub fn run(flags: &flags::Info) -> Vec<Diagnostic> {
|
|||
};
|
||||
|
||||
// Calculate total JSON size (sum of all observations + newline separators)
|
||||
let total_json_size: u64 = observations.iter().map(|obs| obs.json_size as u64).sum::<u64>()
|
||||
let total_json_size: u64 = observations
|
||||
.iter()
|
||||
.map(|obs| obs.json_size as u64)
|
||||
.sum::<u64>()
|
||||
+ (observations.len() as u64).saturating_sub(1); // Add newlines between observations
|
||||
|
||||
let efficiency_percent = if total_json_size > 0 {
|
||||
|
|
@ -96,6 +103,7 @@ pub fn run(flags: &flags::Info) -> Vec<Diagnostic> {
|
|||
if observations.is_empty() {
|
||||
let empty_output = JsonInfoOutput {
|
||||
archive: flags.file.display().to_string(),
|
||||
compression: compression_format.to_string(),
|
||||
created: "".to_string(),
|
||||
file_size,
|
||||
snapshot_count,
|
||||
|
|
@ -107,7 +115,7 @@ pub fn run(flags: &flags::Info) -> Vec<Diagnostic> {
|
|||
"{}",
|
||||
serde_json::to_string_pretty(&empty_output).unwrap_or_default()
|
||||
);
|
||||
return Vec::new();
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let json_observations: Vec<JsonObservation> = observations
|
||||
|
|
@ -128,6 +136,7 @@ pub fn run(flags: &flags::Info) -> Vec<Diagnostic> {
|
|||
|
||||
let json_output = JsonInfoOutput {
|
||||
archive: flags.file.display().to_string(),
|
||||
compression: compression_format.to_string(),
|
||||
created: observations[0].created.to_rfc3339(),
|
||||
file_size,
|
||||
snapshot_count,
|
||||
|
|
@ -143,10 +152,11 @@ pub fn run(flags: &flags::Info) -> Vec<Diagnostic> {
|
|||
} else {
|
||||
// Human-readable output mode
|
||||
println!("Archive: {}", flags.file.display());
|
||||
println!("Compression: {}", compression_format);
|
||||
|
||||
if observations.is_empty() {
|
||||
println!("No observations found");
|
||||
return Vec::new();
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let first_timestamp = &observations[0].created;
|
||||
|
|
@ -217,56 +227,26 @@ pub fn run(flags: &flags::Info) -> Vec<Diagnostic> {
|
|||
snapshot_text,
|
||||
comparison
|
||||
);
|
||||
println!(
|
||||
"Data size: {}",
|
||||
format_size(total_json_size)
|
||||
);
|
||||
println!("Data size: {}", format_size(total_json_size));
|
||||
|
||||
// Add usage instructions
|
||||
println!();
|
||||
println!("To get the JSON value at a specific observation:");
|
||||
println!(" json-archive state --index <#> {}", flags.file.display());
|
||||
println!(
|
||||
" json-archive state --id <observation-id> {}",
|
||||
flags.file.display()
|
||||
);
|
||||
println!();
|
||||
println!("Examples:");
|
||||
println!(
|
||||
" json-archive state --index 0 {} # Get initial state",
|
||||
flags.file.display()
|
||||
);
|
||||
println!(
|
||||
" json-archive state --index 2 {} # Get state after observation 2",
|
||||
flags.file.display()
|
||||
);
|
||||
println!(" json-archive state --index <#> <archive>");
|
||||
println!(" json-archive state --id <observation-id> <archive>");
|
||||
}
|
||||
|
||||
Vec::new()
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn collect_observations(file_path: &Path) -> Result<(Vec<ObservationInfo>, usize), Vec<Diagnostic>> {
|
||||
let reader = match json_archive::ArchiveReader::new(file_path, json_archive::ReadMode::AppendSeek) {
|
||||
Ok(r) => r,
|
||||
Err(e) => {
|
||||
return Err(vec![Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't open the archive file: {}", e),
|
||||
)]);
|
||||
}
|
||||
};
|
||||
fn collect_observations(
|
||||
file_path: &Path,
|
||||
) -> Result<(Vec<ObservationInfo>, usize, CompressionFormat), Vec<Diagnostic>> {
|
||||
let opened = open_archive(file_path)?;
|
||||
let compression_format = opened.format;
|
||||
|
||||
let (initial_state, mut event_iter) = match reader.events(file_path) {
|
||||
Ok(r) => r,
|
||||
Err(e) => {
|
||||
return Err(vec![Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't read the archive file: {}", e),
|
||||
)]);
|
||||
}
|
||||
};
|
||||
let (initial_state, mut event_iter) =
|
||||
read_events(opened.reader, &file_path.display().to_string())?;
|
||||
|
||||
// Check for fatal diagnostics from initial parsing
|
||||
if event_iter.diagnostics.has_fatal() {
|
||||
|
|
@ -295,7 +275,11 @@ fn collect_observations(file_path: &Path) -> Result<(Vec<ObservationInfo>, usize
|
|||
// Iterate through events
|
||||
while let Some(event) = event_iter.next() {
|
||||
match event {
|
||||
Event::Observe { observation_id, timestamp, change_count } => {
|
||||
Event::Observe {
|
||||
observation_id,
|
||||
timestamp,
|
||||
change_count,
|
||||
} => {
|
||||
observations.push(ObservationInfo {
|
||||
id: observation_id,
|
||||
timestamp,
|
||||
|
|
@ -316,7 +300,9 @@ fn collect_observations(file_path: &Path) -> Result<(Vec<ObservationInfo>, usize
|
|||
}
|
||||
}
|
||||
}
|
||||
Event::Change { path, new_value, .. } => {
|
||||
Event::Change {
|
||||
path, new_value, ..
|
||||
} => {
|
||||
let _ = json_archive::apply_change(&mut current_state, &path, new_value);
|
||||
|
||||
// Update the JSON size of the last observation
|
||||
|
|
@ -368,10 +354,9 @@ fn collect_observations(file_path: &Path) -> Result<(Vec<ObservationInfo>, usize
|
|||
}
|
||||
}
|
||||
|
||||
Ok((observations, snapshot_count))
|
||||
Ok((observations, snapshot_count, compression_format))
|
||||
}
|
||||
|
||||
|
||||
fn format_timestamp(dt: &DateTime<Utc>) -> String {
|
||||
dt.format("%a %H:%M:%S %d-%b-%Y").to_string()
|
||||
}
|
||||
|
|
|
|||
|
|
@ -21,3 +21,4 @@
|
|||
|
||||
pub mod info;
|
||||
pub mod state;
|
||||
pub mod write;
|
||||
|
|
|
|||
|
|
@ -21,7 +21,11 @@
|
|||
|
||||
use crate::flags;
|
||||
use chrono::{DateTime, Utc};
|
||||
use json_archive::{apply_add, apply_change, apply_move, apply_remove, ArchiveReader, Diagnostic, DiagnosticCode, DiagnosticLevel, Event, ReadMode};
|
||||
use json_archive::archive_open::open_archive;
|
||||
use json_archive::{
|
||||
apply_add, apply_change, apply_move, apply_remove, read_events, Diagnostic, DiagnosticCode,
|
||||
DiagnosticLevel, Event,
|
||||
};
|
||||
use serde_json::Value;
|
||||
use std::path::Path;
|
||||
|
||||
|
|
@ -35,9 +39,9 @@ enum AccessMethod {
|
|||
Latest,
|
||||
}
|
||||
|
||||
pub fn run(flags: &flags::State) -> Vec<Diagnostic> {
|
||||
pub fn run(flags: &flags::State) -> Result<(), Vec<Diagnostic>> {
|
||||
if !flags.file.exists() {
|
||||
return vec![Diagnostic::new(
|
||||
return Err(vec![Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't find the archive file: {}", flags.file.display()),
|
||||
|
|
@ -46,34 +50,34 @@ pub fn run(flags: &flags::State) -> Vec<Diagnostic> {
|
|||
"Make sure the file path is correct and the file exists. \
|
||||
Check for typos in the filename."
|
||||
.to_string(),
|
||||
)];
|
||||
)]);
|
||||
}
|
||||
|
||||
// Parse and validate flags - ensure only one access method is specified
|
||||
let access_method = match parse_access_method(flags) {
|
||||
Ok(method) => method,
|
||||
Err(diagnostic) => return vec![diagnostic],
|
||||
Err(diagnostic) => return Err(vec![diagnostic]),
|
||||
};
|
||||
|
||||
// Find and replay to the target observation
|
||||
let target_state = match find_and_replay_to_target(&flags.file, &access_method) {
|
||||
Ok(state) => state,
|
||||
Err(diagnostics) => return diagnostics,
|
||||
Err(diagnostics) => return Err(diagnostics),
|
||||
};
|
||||
|
||||
// Output the JSON state
|
||||
match serde_json::to_string_pretty(&target_state) {
|
||||
Ok(json) => println!("{}", json),
|
||||
Err(e) => {
|
||||
return vec![Diagnostic::new(
|
||||
return Err(vec![Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::InvalidEventJson,
|
||||
format!("I couldn't serialize the state to JSON: {}", e),
|
||||
)];
|
||||
)]);
|
||||
}
|
||||
}
|
||||
|
||||
Vec::new()
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn parse_access_method(flags: &flags::State) -> Result<AccessMethod, Diagnostic> {
|
||||
|
|
@ -151,27 +155,10 @@ fn find_and_replay_to_target(
|
|||
file_path: &Path,
|
||||
access_method: &AccessMethod,
|
||||
) -> Result<Value, Vec<Diagnostic>> {
|
||||
let reader = match ArchiveReader::new(file_path, ReadMode::AppendSeek) {
|
||||
Ok(r) => r,
|
||||
Err(e) => {
|
||||
return Err(vec![Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't open the archive file: {}", e),
|
||||
)]);
|
||||
}
|
||||
};
|
||||
let opened = open_archive(file_path)?;
|
||||
|
||||
let (initial_state, mut event_iter) = match reader.events(file_path) {
|
||||
Ok(r) => r,
|
||||
Err(e) => {
|
||||
return Err(vec![Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't read the archive file: {}", e),
|
||||
)]);
|
||||
}
|
||||
};
|
||||
let (initial_state, mut event_iter) =
|
||||
read_events(opened.reader, &file_path.display().to_string())?;
|
||||
|
||||
// Check for fatal diagnostics from initial parsing
|
||||
if event_iter.diagnostics.has_fatal() {
|
||||
|
|
@ -193,7 +180,11 @@ fn find_and_replay_to_target(
|
|||
// Process events and track state at each observation
|
||||
while let Some(event) = event_iter.next() {
|
||||
match event {
|
||||
Event::Observe { observation_id, timestamp, change_count: _ } => {
|
||||
Event::Observe {
|
||||
observation_id,
|
||||
timestamp,
|
||||
change_count: _,
|
||||
} => {
|
||||
observations.push(ObservationWithEvents {
|
||||
id: observation_id,
|
||||
timestamp,
|
||||
|
|
@ -210,7 +201,9 @@ fn find_and_replay_to_target(
|
|||
}
|
||||
}
|
||||
}
|
||||
Event::Change { path, new_value, .. } => {
|
||||
Event::Change {
|
||||
path, new_value, ..
|
||||
} => {
|
||||
let _ = apply_change(&mut current_state, &path, new_value);
|
||||
|
||||
// Update the final state of the last observation
|
||||
|
|
|
|||
316
src/cmd/write.rs
Normal file
316
src/cmd/write.rs
Normal file
|
|
@ -0,0 +1,316 @@
|
|||
use crate::flags;
|
||||
use json_archive::archive_open::{check_compression_support, open_archive};
|
||||
use json_archive::archive_reader::{read_archive, ReadMode};
|
||||
use json_archive::archive_writer::{default_output_filename, write_observation};
|
||||
use json_archive::atomic_file::atomic_replace_file;
|
||||
use json_archive::compression_writer::CompressionWriter;
|
||||
use json_archive::detection::CompressionFormat;
|
||||
use json_archive::write_strategy::{determine_strategy, WriteStrategy};
|
||||
use json_archive::{is_json_archive, Diagnostic, DiagnosticCode, DiagnosticLevel};
|
||||
|
||||
use serde_json::Value;
|
||||
use std::fs::{File, OpenOptions};
|
||||
use std::io::{BufWriter, Write};
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
pub fn run(flags: &flags::Write) -> Result<(), Vec<Diagnostic>> {
|
||||
let (input_files, strategy) = parse_flags(flags)?;
|
||||
|
||||
assert!(!input_files.is_empty());
|
||||
|
||||
match strategy {
|
||||
WriteStrategy::Create {
|
||||
output: (dest, dest_fmt),
|
||||
} => {
|
||||
check_compression_support(dest_fmt, &dest, "write")?;
|
||||
|
||||
println!("Creating new archive: {}", dest.display());
|
||||
println!("Input files: {:?}", input_files);
|
||||
|
||||
// Create the writer - on error, no file cleanup needed since create failed
|
||||
let mut writer = CompressionWriter::create(&dest, dest_fmt)?;
|
||||
|
||||
let mut current_state = Value::Null;
|
||||
let mut observation_count: usize = 0;
|
||||
for file in input_files {
|
||||
// TODO: On write error, we need to clean up the partially written file ourselves
|
||||
current_state = write_observation(
|
||||
&mut writer,
|
||||
&mut observation_count,
|
||||
flags.snapshot_interval,
|
||||
¤t_state,
|
||||
&file,
|
||||
flags.source.clone(),
|
||||
)?;
|
||||
}
|
||||
|
||||
// Finalize compression and flush buffers.
|
||||
// Note: finish() does not clean up the file on error - caller must
|
||||
// remove the file themselves if this fails.
|
||||
if let Err(diagnostics) = writer.finish() {
|
||||
let _ = std::fs::remove_file(&dest);
|
||||
return Err(diagnostics);
|
||||
}
|
||||
|
||||
println!("Archive created successfully: {}", dest.display());
|
||||
Ok(())
|
||||
}
|
||||
WriteStrategy::Append { path } => {
|
||||
let opened = open_archive(&path)?;
|
||||
let read_result = read_archive(
|
||||
opened.reader,
|
||||
&path.display().to_string(),
|
||||
ReadMode::AppendSeek,
|
||||
)?;
|
||||
|
||||
if read_result.diagnostics.has_fatal() {
|
||||
return Err(read_result.diagnostics.into_diagnostics());
|
||||
}
|
||||
|
||||
let mut current_state = read_result.final_state;
|
||||
// observation_count starts at existing count + 1 (header counts as first observation)
|
||||
let mut observation_count = read_result.observation_count + 1;
|
||||
|
||||
// Note, we are reopening the same file for appending. So getting a new file descriptor
|
||||
let mut writer = BufWriter::new(open_for_appending(&path)?);
|
||||
|
||||
for filename in input_files {
|
||||
current_state = write_observation(
|
||||
&mut writer,
|
||||
&mut observation_count,
|
||||
flags.snapshot_interval,
|
||||
¤t_state,
|
||||
&filename,
|
||||
flags.source.clone(),
|
||||
)?;
|
||||
}
|
||||
|
||||
writer.flush().map_err(|e| {
|
||||
Diagnostic::fatal(
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't flush the output: {}", e),
|
||||
)
|
||||
})?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
WriteStrategy::CopyOnWrite {
|
||||
input: (src, fmt_src),
|
||||
output: (dest, fmt_dest),
|
||||
} => {
|
||||
assert!(src != dest);
|
||||
check_compression_support(fmt_src, &src, "read")?;
|
||||
check_compression_support(fmt_dest, &dest, "write")?;
|
||||
|
||||
copy_and_append(
|
||||
&src,
|
||||
&dest,
|
||||
fmt_dest,
|
||||
&input_files,
|
||||
flags.snapshot_interval,
|
||||
flags.source.clone(),
|
||||
)
|
||||
}
|
||||
WriteStrategy::AtomicSwap {
|
||||
path,
|
||||
compression: format,
|
||||
temp_path,
|
||||
} => {
|
||||
assert!(path != temp_path);
|
||||
check_compression_support(format, &path, "read")?;
|
||||
|
||||
copy_and_append(
|
||||
&path,
|
||||
&temp_path,
|
||||
format,
|
||||
&input_files,
|
||||
flags.snapshot_interval,
|
||||
flags.source.clone(),
|
||||
)?;
|
||||
|
||||
atomic_replace_file(&path, &temp_path)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn open_for_appending(path: &Path) -> Result<File, Vec<Diagnostic>> {
|
||||
let file: File = OpenOptions::new().append(true).open(&path).map_err(|e| {
|
||||
Diagnostic::fatal(
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't open the archive file for appending: {}", e),
|
||||
)
|
||||
.with_advice("Make sure the archive file exists and you have write permission.".to_string())
|
||||
})?;
|
||||
Ok(file)
|
||||
}
|
||||
|
||||
/// Copy an archive from source to destination, then append new observations.
|
||||
///
|
||||
/// This handles decompression of the source and compression of the destination
|
||||
/// transparently. On error, the destination file is removed.
|
||||
fn copy_and_append(
|
||||
src: &Path,
|
||||
dest: &Path,
|
||||
dest_fmt: CompressionFormat,
|
||||
input_files: &[PathBuf],
|
||||
snapshot_interval: Option<usize>,
|
||||
source: Option<String>,
|
||||
) -> Result<(), Vec<Diagnostic>> {
|
||||
assert!(src != dest);
|
||||
let opened = open_archive(src)?;
|
||||
let mut reader = opened.reader;
|
||||
|
||||
// Create destination writer (handles compression)
|
||||
let mut writer = CompressionWriter::create(dest, dest_fmt)?;
|
||||
|
||||
// Copy all decompressed bytes to the new (possibly compressed) destination
|
||||
std::io::copy(&mut reader, &mut writer).map_err(|e| {
|
||||
let _ = std::fs::remove_file(dest);
|
||||
Diagnostic::fatal(
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't copy the archive contents: {}", e),
|
||||
)
|
||||
})?;
|
||||
|
||||
// Read the archive to get final state for appending
|
||||
let opened = open_archive(src)?;
|
||||
let read_result = read_archive(
|
||||
opened.reader,
|
||||
&src.display().to_string(),
|
||||
ReadMode::AppendSeek,
|
||||
)?;
|
||||
|
||||
if read_result.diagnostics.has_fatal() {
|
||||
let _ = std::fs::remove_file(dest);
|
||||
return Err(read_result.diagnostics.into_diagnostics());
|
||||
}
|
||||
|
||||
let mut current_state = read_result.final_state;
|
||||
let mut observation_count = read_result.observation_count + 1;
|
||||
|
||||
// Append new observations
|
||||
for filename in input_files {
|
||||
current_state = write_observation(
|
||||
&mut writer,
|
||||
&mut observation_count,
|
||||
snapshot_interval,
|
||||
¤t_state,
|
||||
filename,
|
||||
source.clone(),
|
||||
)?;
|
||||
}
|
||||
|
||||
// Finalize compression and flush buffers
|
||||
if let Err(diagnostics) = writer.finish() {
|
||||
let _ = std::fs::remove_file(dest);
|
||||
return Err(diagnostics);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Parse the CLI arguments to determine the destination archive and input files.
|
||||
/// This consolidates all the inferring behavior in one place.
|
||||
fn parse_flags(flags: &flags::Write) -> Result<(Vec<PathBuf>, WriteStrategy), Vec<Diagnostic>> {
|
||||
let mut diagnostics = Vec::new();
|
||||
if flags.inputs.is_empty() {
|
||||
diagnostics.push(
|
||||
Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::MissingHeaderField,
|
||||
"I need at least one JSON file to create an archive, but you didn't provide any."
|
||||
.to_string(),
|
||||
)
|
||||
.with_advice(
|
||||
"Usage: json-archive <file1.json> [file2.json ...]\n\n\
|
||||
The first file will be used as the initial state, and subsequent files \
|
||||
will be compared to generate change events."
|
||||
.to_string(),
|
||||
),
|
||||
);
|
||||
return Err(diagnostics);
|
||||
}
|
||||
|
||||
// I figured it would be a helpful bit of automation on behalf of the human
|
||||
// user for this tool to validate all input files exist
|
||||
for file in flags.inputs.iter() {
|
||||
if !file.exists() {
|
||||
diagnostics.push(
|
||||
Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't find the input file: {}", file.display()),
|
||||
)
|
||||
.with_advice(
|
||||
"Make sure the file path is correct and the file exists. \
|
||||
Check for typos in the filename."
|
||||
.to_string(),
|
||||
),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
let source_archive: Option<PathBuf> = if Path::new(&flags.inputs[0]).exists()
|
||||
&& is_json_archive(&flags.inputs[0]).unwrap_or(false)
|
||||
{
|
||||
Some(flags.inputs[0].clone())
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
// Determine the destination archive path
|
||||
let destination = if let Some(output) = &flags.output {
|
||||
// Explicitly specified output path
|
||||
output.clone()
|
||||
} else if source_archive.is_some() {
|
||||
source_archive.clone().unwrap()
|
||||
} else {
|
||||
// Infer from first input
|
||||
default_output_filename(&flags.inputs[0])
|
||||
};
|
||||
|
||||
// Filter out the destination from input files to avoid read-write conflicts
|
||||
let input_files: Vec<_> = flags
|
||||
.inputs
|
||||
.iter()
|
||||
.filter(|path| {
|
||||
match (
|
||||
std::fs::canonicalize(path).ok(),
|
||||
std::fs::canonicalize(&destination).ok(),
|
||||
) {
|
||||
(Some(p), Some(d)) => p != d,
|
||||
_ => true, // Include if canonicalization fails (file doesn't exist yet)
|
||||
}
|
||||
})
|
||||
.cloned()
|
||||
.collect();
|
||||
|
||||
if input_files.is_empty() {
|
||||
diagnostics.push(
|
||||
Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::MissingHeaderField,
|
||||
"No input files remain after filtering out the destination archive.".to_string()
|
||||
)
|
||||
.with_advice(
|
||||
"You specified the output path in the list of input files. This would cause a read-write conflict.\n\
|
||||
Either remove the output path from inputs, or use a different output path with -o."
|
||||
.to_string()
|
||||
)
|
||||
);
|
||||
return Err(diagnostics);
|
||||
}
|
||||
|
||||
if !diagnostics.is_empty() {
|
||||
return Err(diagnostics);
|
||||
}
|
||||
|
||||
Ok((
|
||||
input_files,
|
||||
determine_strategy(
|
||||
source_archive.as_deref(),
|
||||
&destination,
|
||||
CompressionFormat::None,
|
||||
),
|
||||
))
|
||||
}
|
||||
431
src/compression_writer.rs
Normal file
431
src/compression_writer.rs
Normal file
|
|
@ -0,0 +1,431 @@
|
|||
// json-archive is a tool for tracking JSON file changes over time
|
||||
// Copyright (C) 2025 Peoples Grocers LLC
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as published
|
||||
// by the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
//
|
||||
// To purchase a license under different terms contact admin@peoplesgrocers.com
|
||||
// To request changes, report bugs, or give user feedback contact
|
||||
// marxism@peoplesgrocers.com
|
||||
//
|
||||
|
||||
//! Unified writer abstraction for compressed and uncompressed output.
|
||||
//!
|
||||
//! This module provides `CompressionWriter`, an enum that wraps different
|
||||
//! compression encoders behind a common interface implementing `std::io::Write`.
|
||||
//!
|
||||
//! The goal is to simplify write logic by allowing callers to write to any
|
||||
//! compression format using the same API, with proper error handling that
|
||||
//! produces user-friendly diagnostics.
|
||||
|
||||
use std::fs::File;
|
||||
use std::io::{BufWriter, Write};
|
||||
use std::path::Path;
|
||||
|
||||
use crate::detection::CompressionFormat;
|
||||
use crate::diagnostics::{Diagnostic, DiagnosticCode};
|
||||
|
||||
/// A writer that handles optional compression transparently.
|
||||
///
|
||||
/// Wraps different compression encoders behind a unified interface
|
||||
/// that implements `Write` and provides a `finish()` method for cleanup.
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```ignore
|
||||
/// use json_archive::compression_writer::CompressionWriter;
|
||||
/// use json_archive::detection::CompressionFormat;
|
||||
/// use std::io::Write;
|
||||
///
|
||||
/// let mut writer = CompressionWriter::create(path, CompressionFormat::Gzip)?;
|
||||
/// writeln!(writer, "some data")?;
|
||||
/// writer.finish()?;
|
||||
/// ```
|
||||
// Note: Cannot derive Debug because compression encoder types don't implement Debug
|
||||
pub enum CompressionWriter {
|
||||
/// Uncompressed output - uses BufWriter since File has no internal buffering
|
||||
Plain(BufWriter<File>),
|
||||
/// Compression encoders write directly to File - they do their own internal buffering
|
||||
#[cfg(feature = "compression")]
|
||||
Gzip(flate2::write::GzEncoder<File>),
|
||||
#[cfg(feature = "compression")]
|
||||
Zlib(flate2::write::ZlibEncoder<File>),
|
||||
#[cfg(feature = "compression")]
|
||||
Zstd(zstd::stream::write::Encoder<'static, File>),
|
||||
#[cfg(feature = "compression")]
|
||||
Brotli(brotli::CompressorWriter<File>),
|
||||
}
|
||||
|
||||
impl CompressionWriter {
|
||||
/// Open a file for writing with the specified compression format.
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns a diagnostic explaining:
|
||||
/// - What file we tried to create
|
||||
/// - What compression format was requested
|
||||
/// - Why it failed (permissions, disk full, unsupported format, etc.)
|
||||
pub fn create(path: &Path, format: CompressionFormat) -> Result<Self, Vec<Diagnostic>> {
|
||||
let file = File::create(path).map_err(|e| {
|
||||
vec![Diagnostic::fatal(
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!(
|
||||
"I couldn't create the output file '{}': {}",
|
||||
path.display(),
|
||||
describe_io_error(&e)
|
||||
),
|
||||
)
|
||||
.with_advice(advice_for_create_error(&e, path))]
|
||||
})?;
|
||||
|
||||
match format {
|
||||
// Plain needs BufWriter since File has no internal buffering
|
||||
CompressionFormat::None => Ok(Self::Plain(BufWriter::new(file))),
|
||||
|
||||
// Compression encoders do their own buffering, write directly to File
|
||||
#[cfg(feature = "compression")]
|
||||
CompressionFormat::Gzip => {
|
||||
use flate2::write::GzEncoder;
|
||||
use flate2::Compression;
|
||||
Ok(Self::Gzip(GzEncoder::new(file, Compression::default())))
|
||||
}
|
||||
|
||||
#[cfg(feature = "compression")]
|
||||
CompressionFormat::Zlib => {
|
||||
use flate2::write::ZlibEncoder;
|
||||
use flate2::Compression;
|
||||
Ok(Self::Zlib(ZlibEncoder::new(file, Compression::default())))
|
||||
}
|
||||
|
||||
#[cfg(feature = "compression")]
|
||||
CompressionFormat::Deflate => {
|
||||
// Deflate is a raw compression algorithm, not a container format.
|
||||
// We can read deflate data, but when writing we need to pick a
|
||||
// container (gzip or zlib) that provides headers and checksums.
|
||||
Err(vec![Diagnostic::fatal(
|
||||
DiagnosticCode::UnsupportedVersion,
|
||||
"I can't write raw deflate format because it's not a container format.".to_string(),
|
||||
)
|
||||
.with_advice(
|
||||
"Deflate is a compression algorithm, not a file format. When writing, \
|
||||
you need to choose a container format that wraps deflate data:\n\
|
||||
\n - Use .gz (gzip) for general-purpose compression\n \
|
||||
- Use .zlib for zlib-wrapped deflate\n\
|
||||
\nIf you're appending to an existing deflate file, consider converting \
|
||||
it to gzip first.".to_string()
|
||||
)])
|
||||
}
|
||||
|
||||
#[cfg(feature = "compression")]
|
||||
CompressionFormat::Zstd => {
|
||||
let encoder = zstd::stream::write::Encoder::new(file, 0).map_err(|e| {
|
||||
vec![Diagnostic::fatal(
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!(
|
||||
"I couldn't initialize zstd compression for '{}': {}",
|
||||
path.display(),
|
||||
e
|
||||
),
|
||||
)]
|
||||
})?;
|
||||
Ok(Self::Zstd(encoder))
|
||||
}
|
||||
|
||||
#[cfg(feature = "compression")]
|
||||
CompressionFormat::Brotli => {
|
||||
// buffer_size=4096, quality=11 (max), lgwin=22 (default window)
|
||||
Ok(Self::Brotli(brotli::CompressorWriter::new(
|
||||
file, 4096, 11, 22,
|
||||
)))
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "compression"))]
|
||||
_ => Err(vec![Diagnostic::fatal(
|
||||
DiagnosticCode::UnsupportedVersion,
|
||||
format!(
|
||||
"I can't write {} compressed files because this build doesn't include compression support.",
|
||||
format_name(format)
|
||||
),
|
||||
)
|
||||
.with_advice("Rebuild with: cargo build --features compression".to_string())]),
|
||||
}
|
||||
}
|
||||
|
||||
/// Finish writing and flush all buffers.
|
||||
///
|
||||
/// For compressed formats, this finalizes the compression stream.
|
||||
/// Must be called before dropping to ensure all data is written.
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns a diagnostic if flushing or finalizing fails.
|
||||
///
|
||||
/// **Important**: This method does not clean up the output file on error.
|
||||
/// If `finish()` fails, the caller is responsible for removing the
|
||||
/// partially-written file themselves:
|
||||
///
|
||||
/// ```ignore
|
||||
/// if let Err(diagnostics) = writer.finish() {
|
||||
/// let _ = std::fs::remove_file(&path);
|
||||
/// return Err(diagnostics);
|
||||
/// }
|
||||
/// ```
|
||||
pub fn finish(self) -> Result<(), Vec<Diagnostic>> {
|
||||
match self {
|
||||
Self::Plain(mut w) => w.flush().map_err(|e| {
|
||||
vec![Diagnostic::fatal(
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!(
|
||||
"I couldn't flush the output file: {}",
|
||||
describe_io_error(&e)
|
||||
),
|
||||
)]
|
||||
}),
|
||||
|
||||
#[cfg(feature = "compression")]
|
||||
Self::Gzip(encoder) => {
|
||||
encoder.finish().map_err(|e| {
|
||||
vec![Diagnostic::fatal(
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!(
|
||||
"I couldn't finalize gzip compression: {}",
|
||||
describe_io_error(&e)
|
||||
),
|
||||
)]
|
||||
})?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(feature = "compression")]
|
||||
Self::Zlib(encoder) => {
|
||||
encoder.finish().map_err(|e| {
|
||||
vec![Diagnostic::fatal(
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!(
|
||||
"I couldn't finalize zlib compression: {}",
|
||||
describe_io_error(&e)
|
||||
),
|
||||
)]
|
||||
})?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(feature = "compression")]
|
||||
Self::Zstd(encoder) => {
|
||||
encoder.finish().map_err(|e| {
|
||||
vec![Diagnostic::fatal(
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!(
|
||||
"I couldn't finalize zstd compression: {}",
|
||||
describe_io_error(&e)
|
||||
),
|
||||
)]
|
||||
})?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(feature = "compression")]
|
||||
Self::Brotli(mut encoder) => {
|
||||
// Brotli uses a different API - no finish() method
|
||||
// Flush the encoder (brotli auto-flushes on drop, but we flush explicitly)
|
||||
encoder.flush().map_err(|e| {
|
||||
vec![Diagnostic::fatal(
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!(
|
||||
"I couldn't finalize brotli compression: {}",
|
||||
describe_io_error(&e)
|
||||
),
|
||||
)]
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Write for CompressionWriter {
|
||||
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
|
||||
match self {
|
||||
Self::Plain(w) => w.write(buf),
|
||||
#[cfg(feature = "compression")]
|
||||
Self::Gzip(w) => w.write(buf),
|
||||
#[cfg(feature = "compression")]
|
||||
Self::Zlib(w) => w.write(buf),
|
||||
#[cfg(feature = "compression")]
|
||||
Self::Zstd(w) => w.write(buf),
|
||||
#[cfg(feature = "compression")]
|
||||
Self::Brotli(w) => w.write(buf),
|
||||
}
|
||||
}
|
||||
|
||||
fn flush(&mut self) -> std::io::Result<()> {
|
||||
match self {
|
||||
Self::Plain(w) => w.flush(),
|
||||
#[cfg(feature = "compression")]
|
||||
Self::Gzip(w) => w.flush(),
|
||||
#[cfg(feature = "compression")]
|
||||
Self::Zlib(w) => w.flush(),
|
||||
#[cfg(feature = "compression")]
|
||||
Self::Zstd(w) => w.flush(),
|
||||
#[cfg(feature = "compression")]
|
||||
Self::Brotli(w) => w.flush(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Translate io::Error into human-readable descriptions.
|
||||
fn describe_io_error(e: &std::io::Error) -> String {
|
||||
match e.kind() {
|
||||
std::io::ErrorKind::NotFound => "the directory doesn't exist".to_string(),
|
||||
std::io::ErrorKind::PermissionDenied => "permission denied".to_string(),
|
||||
std::io::ErrorKind::AlreadyExists => {
|
||||
"a directory with that name already exists".to_string()
|
||||
}
|
||||
std::io::ErrorKind::StorageFull => "the disk is full".to_string(),
|
||||
std::io::ErrorKind::ReadOnlyFilesystem => "the filesystem is read-only".to_string(),
|
||||
_ => e.to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Generate helpful advice based on the error type.
|
||||
fn advice_for_create_error(e: &std::io::Error, path: &Path) -> String {
|
||||
match e.kind() {
|
||||
std::io::ErrorKind::NotFound => {
|
||||
if let Some(parent) = path.parent() {
|
||||
format!(
|
||||
"The parent directory '{}' doesn't exist. Create it first with:\n mkdir -p '{}'",
|
||||
parent.display(),
|
||||
parent.display()
|
||||
)
|
||||
} else {
|
||||
"Check that the path is valid.".to_string()
|
||||
}
|
||||
}
|
||||
std::io::ErrorKind::PermissionDenied => {
|
||||
format!(
|
||||
"You don't have write permission for this location. Try:\n ls -la '{}'",
|
||||
path.parent()
|
||||
.map(|p| p.display().to_string())
|
||||
.unwrap_or_else(|| ".".to_string())
|
||||
)
|
||||
}
|
||||
std::io::ErrorKind::StorageFull => {
|
||||
"Free up disk space or write to a different location.".to_string()
|
||||
}
|
||||
_ => "Check that the path is valid and you have write permission.".to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Get a human-readable name for a compression format.
|
||||
#[cfg(not(feature = "compression"))]
|
||||
fn format_name(format: CompressionFormat) -> &'static str {
|
||||
match format {
|
||||
CompressionFormat::Gzip => "gzip",
|
||||
CompressionFormat::Zlib => "zlib",
|
||||
CompressionFormat::Zstd => "zstd",
|
||||
CompressionFormat::Brotli => "brotli",
|
||||
CompressionFormat::Deflate => "deflate",
|
||||
CompressionFormat::None => "uncompressed",
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::io::Read;
|
||||
use tempfile::NamedTempFile;
|
||||
|
||||
#[test]
|
||||
fn test_plain_writer() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let temp_file = NamedTempFile::new()?;
|
||||
let path = temp_file.path();
|
||||
|
||||
{
|
||||
let mut writer = CompressionWriter::create(path, CompressionFormat::None)
|
||||
.map_err(|d| format!("{:?}", d))?;
|
||||
writeln!(writer, "hello world").map_err(|e| format!("{}", e))?;
|
||||
writer.finish().map_err(|d| format!("{:?}", d))?;
|
||||
}
|
||||
|
||||
let content = std::fs::read_to_string(path)?;
|
||||
assert_eq!(content, "hello world\n");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(feature = "compression")]
|
||||
fn test_gzip_writer() -> Result<(), Box<dyn std::error::Error>> {
|
||||
use flate2::read::GzDecoder;
|
||||
|
||||
let temp_file = NamedTempFile::new()?;
|
||||
let path = temp_file.path();
|
||||
|
||||
{
|
||||
let mut writer = CompressionWriter::create(path, CompressionFormat::Gzip)
|
||||
.map_err(|d| format!("{:?}", d))?;
|
||||
writeln!(writer, "hello gzip").map_err(|e| format!("{}", e))?;
|
||||
writer.finish().map_err(|d| format!("{:?}", d))?;
|
||||
}
|
||||
|
||||
// Verify by decompressing
|
||||
let file = File::open(path)?;
|
||||
let mut decoder = GzDecoder::new(file);
|
||||
let mut content = String::new();
|
||||
decoder.read_to_string(&mut content)?;
|
||||
assert_eq!(content, "hello gzip\n");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(feature = "compression")]
|
||||
fn test_zstd_writer() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let temp_file = NamedTempFile::new()?;
|
||||
let path = temp_file.path();
|
||||
|
||||
{
|
||||
let mut writer = CompressionWriter::create(path, CompressionFormat::Zstd)
|
||||
.map_err(|d| format!("{:?}", d))?;
|
||||
writeln!(writer, "hello zstd").map_err(|e| format!("{}", e))?;
|
||||
writer.finish().map_err(|d| format!("{:?}", d))?;
|
||||
}
|
||||
|
||||
// Verify by decompressing
|
||||
let file = File::open(path)?;
|
||||
let mut decoder = zstd::stream::read::Decoder::new(file)?;
|
||||
let mut content = String::new();
|
||||
decoder.read_to_string(&mut content)?;
|
||||
assert_eq!(content, "hello zstd\n");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_create_nonexistent_directory() {
|
||||
let result = CompressionWriter::create(
|
||||
Path::new("/nonexistent/directory/file.txt"),
|
||||
CompressionFormat::None,
|
||||
);
|
||||
match result {
|
||||
Ok(_) => panic!("Expected error for nonexistent directory"),
|
||||
Err(diagnostics) => {
|
||||
assert_eq!(diagnostics.len(), 1);
|
||||
// The error message should mention the path
|
||||
assert!(
|
||||
diagnostics[0]
|
||||
.description
|
||||
.contains("/nonexistent/directory/file.txt"),
|
||||
"Expected path in error message, got: {}",
|
||||
diagnostics[0].description
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -91,7 +91,10 @@ pub fn is_json_archive<P: AsRef<Path>>(path: P) -> Result<bool, std::io::Error>
|
|||
|
||||
/// Create a buffered reader that handles decompression if needed.
|
||||
#[cfg(feature = "compression")]
|
||||
fn create_reader(file: File, compression: CompressionFormat) -> Result<Box<dyn BufRead>, std::io::Error> {
|
||||
fn create_reader(
|
||||
file: File,
|
||||
compression: CompressionFormat,
|
||||
) -> Result<Box<dyn BufRead>, std::io::Error> {
|
||||
Ok(match compression {
|
||||
CompressionFormat::Gzip => Box::new(BufReader::new(GzDecoder::new(file))),
|
||||
CompressionFormat::Deflate => Box::new(BufReader::new(DeflateDecoder::new(file))),
|
||||
|
|
@ -103,7 +106,10 @@ fn create_reader(file: File, compression: CompressionFormat) -> Result<Box<dyn B
|
|||
}
|
||||
|
||||
#[cfg(not(feature = "compression"))]
|
||||
fn create_reader(file: File, compression: CompressionFormat) -> Result<Box<dyn BufRead>, std::io::Error> {
|
||||
fn create_reader(
|
||||
file: File,
|
||||
compression: CompressionFormat,
|
||||
) -> Result<Box<dyn BufRead>, std::io::Error> {
|
||||
if compression != CompressionFormat::None {
|
||||
// Without compression support, we can't decompress to check the header.
|
||||
// Return false by returning an empty reader that will fail header check.
|
||||
|
|
@ -149,6 +155,19 @@ pub enum CompressionFormat {
|
|||
None,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for CompressionFormat {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
CompressionFormat::Gzip => write!(f, "gzip"),
|
||||
CompressionFormat::Deflate => write!(f, "deflate"),
|
||||
CompressionFormat::Zlib => write!(f, "zlib"),
|
||||
CompressionFormat::Brotli => write!(f, "brotli"),
|
||||
CompressionFormat::Zstd => write!(f, "zstd"),
|
||||
CompressionFormat::None => write!(f, "none"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn detect_compression_format(path: &Path, bytes: &[u8]) -> CompressionFormat {
|
||||
if bytes.len() < 4 {
|
||||
return CompressionFormat::None;
|
||||
|
|
@ -160,12 +179,19 @@ pub fn detect_compression_format(path: &Path, bytes: &[u8]) -> CompressionFormat
|
|||
}
|
||||
|
||||
// Zlib magic number: 0x78 followed by 0x01, 0x5e, 0x9c, or 0xda
|
||||
if bytes[0] == 0x78 && (bytes[1] == 0x01 || bytes[1] == 0x5e || bytes[1] == 0x9c || bytes[1] == 0xda) {
|
||||
if bytes[0] == 0x78
|
||||
&& (bytes[1] == 0x01 || bytes[1] == 0x5e || bytes[1] == 0x9c || bytes[1] == 0xda)
|
||||
{
|
||||
return CompressionFormat::Zlib;
|
||||
}
|
||||
|
||||
// Zstd magic number: 0x28 0xb5 0x2f 0xfd
|
||||
if bytes.len() >= 4 && bytes[0] == 0x28 && bytes[1] == 0xb5 && bytes[2] == 0x2f && bytes[3] == 0xfd {
|
||||
if bytes.len() >= 4
|
||||
&& bytes[0] == 0x28
|
||||
&& bytes[1] == 0xb5
|
||||
&& bytes[2] == 0x2f
|
||||
&& bytes[3] == 0xfd
|
||||
{
|
||||
return CompressionFormat::Zstd;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -219,6 +219,12 @@ impl Diagnostic {
|
|||
}
|
||||
}
|
||||
|
||||
impl From<Diagnostic> for Vec<Diagnostic> {
|
||||
fn from(diagnostic: Diagnostic) -> Self {
|
||||
vec![diagnostic]
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Diagnostic {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
if let (Some(filename), Some(line)) = (&self.filename, self.line_number) {
|
||||
|
|
|
|||
|
|
@ -43,7 +43,7 @@
|
|||
//!
|
||||
//! Spent 30 minutes looking for existing solutions. Checked:
|
||||
//! - serde_path_to_error: Adds field path context but still returns string errors
|
||||
//! - figment: Configuration library, but sounded like could be used only for diagnostics
|
||||
//! - figment: Configuration library, but sounded like could be used only for diagnostics
|
||||
//! - config/serde_value: Similar issue
|
||||
//! - json5: Relaxed JSON syntax, not diagnostic-focused
|
||||
//! - miette: a diagnostic library for Rust. It includes a series of
|
||||
|
|
@ -63,10 +63,10 @@
|
|||
//! diagnostics vec instead of returning errors. The calling code (reader.rs) attaches
|
||||
//! location information (filename, line number) after deserialization.
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
use serde::de::{Deserialize, Deserializer, SeqAccess, Visitor};
|
||||
use serde_json::Value;
|
||||
use std::fmt;
|
||||
use chrono::{DateTime, Utc};
|
||||
|
||||
use crate::diagnostics::{Diagnostic, DiagnosticCode, DiagnosticLevel};
|
||||
use crate::events::Event;
|
||||
|
|
@ -120,7 +120,7 @@ impl<'de> Visitor<'de> for EventVisitor {
|
|||
A: SeqAccess<'de>,
|
||||
{
|
||||
let mut elements: Vec<Value> = Vec::new();
|
||||
|
||||
|
||||
while let Some(elem) = seq.next_element::<Value>()? {
|
||||
elements.push(elem);
|
||||
}
|
||||
|
|
@ -140,7 +140,8 @@ impl<'de> Visitor<'de> for EventVisitor {
|
|||
self.deserializer.add_diagnostic(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::WrongFieldType,
|
||||
"I expected the first element of an event to be a string event type.".to_string(),
|
||||
"I expected the first element of an event to be a string event type."
|
||||
.to_string(),
|
||||
);
|
||||
return Ok(self.deserializer);
|
||||
}
|
||||
|
|
@ -152,7 +153,10 @@ impl<'de> Visitor<'de> for EventVisitor {
|
|||
self.deserializer.add_diagnostic(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::WrongFieldCount,
|
||||
format!("I expected an observe event to have 4 fields, but found {}.", elements.len()),
|
||||
format!(
|
||||
"I expected an observe event to have 4 fields, but found {}.",
|
||||
elements.len()
|
||||
),
|
||||
);
|
||||
return Ok(self.deserializer);
|
||||
}
|
||||
|
|
@ -176,7 +180,8 @@ impl<'de> Visitor<'de> for EventVisitor {
|
|||
self.deserializer.add_diagnostic(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::WrongFieldType,
|
||||
"I expected the timestamp to be a valid ISO-8601 datetime string.".to_string(),
|
||||
"I expected the timestamp to be a valid ISO-8601 datetime string."
|
||||
.to_string(),
|
||||
);
|
||||
return Ok(self.deserializer);
|
||||
}
|
||||
|
|
@ -215,7 +220,10 @@ impl<'de> Visitor<'de> for EventVisitor {
|
|||
self.deserializer.add_diagnostic(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::WrongFieldCount,
|
||||
format!("I expected an add event to have 4 fields, but found {}.", elements.len()),
|
||||
format!(
|
||||
"I expected an add event to have 4 fields, but found {}.",
|
||||
elements.len()
|
||||
),
|
||||
);
|
||||
return Ok(self.deserializer);
|
||||
}
|
||||
|
|
@ -258,7 +266,10 @@ impl<'de> Visitor<'de> for EventVisitor {
|
|||
self.deserializer.add_diagnostic(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::WrongFieldCount,
|
||||
format!("I expected a change event to have 4 fields, but found {}.", elements.len()),
|
||||
format!(
|
||||
"I expected a change event to have 4 fields, but found {}.",
|
||||
elements.len()
|
||||
),
|
||||
);
|
||||
return Ok(self.deserializer);
|
||||
}
|
||||
|
|
@ -301,7 +312,10 @@ impl<'de> Visitor<'de> for EventVisitor {
|
|||
self.deserializer.add_diagnostic(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::WrongFieldCount,
|
||||
format!("I expected a remove event to have 3 fields, but found {}.", elements.len()),
|
||||
format!(
|
||||
"I expected a remove event to have 3 fields, but found {}.",
|
||||
elements.len()
|
||||
),
|
||||
);
|
||||
return Ok(self.deserializer);
|
||||
}
|
||||
|
|
@ -341,7 +355,10 @@ impl<'de> Visitor<'de> for EventVisitor {
|
|||
self.deserializer.add_diagnostic(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::WrongFieldCount,
|
||||
format!("I expected a move event to have 4 fields, but found {}.", elements.len()),
|
||||
format!(
|
||||
"I expected a move event to have 4 fields, but found {}.",
|
||||
elements.len()
|
||||
),
|
||||
);
|
||||
return Ok(self.deserializer);
|
||||
}
|
||||
|
|
@ -394,7 +411,10 @@ impl<'de> Visitor<'de> for EventVisitor {
|
|||
self.deserializer.add_diagnostic(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::WrongFieldCount,
|
||||
format!("I expected a snapshot event to have 4 fields, but found {}.", elements.len()),
|
||||
format!(
|
||||
"I expected a snapshot event to have 4 fields, but found {}.",
|
||||
elements.len()
|
||||
),
|
||||
);
|
||||
return Ok(self.deserializer);
|
||||
}
|
||||
|
|
@ -418,7 +438,8 @@ impl<'de> Visitor<'de> for EventVisitor {
|
|||
self.deserializer.add_diagnostic(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::WrongFieldType,
|
||||
"I expected the timestamp to be a valid ISO-8601 datetime string.".to_string(),
|
||||
"I expected the timestamp to be a valid ISO-8601 datetime string."
|
||||
.to_string(),
|
||||
);
|
||||
return Ok(self.deserializer);
|
||||
}
|
||||
|
|
@ -476,14 +497,18 @@ impl EventVisitor {
|
|||
let from_idx = match pair[0].as_u64() {
|
||||
Some(i) => i as usize,
|
||||
None => {
|
||||
return Err("I expected the 'from' index to be a non-negative integer.".to_string());
|
||||
return Err(
|
||||
"I expected the 'from' index to be a non-negative integer.".to_string()
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
let to_idx = match pair[1].as_u64() {
|
||||
Some(i) => i as usize,
|
||||
None => {
|
||||
return Err("I expected the 'to' index to be a non-negative integer.".to_string());
|
||||
return Err(
|
||||
"I expected the 'to' index to be a non-negative integer.".to_string()
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
@ -503,7 +528,7 @@ mod tests {
|
|||
fn test_deserialize_observe_event() {
|
||||
let json = json!(["observe", "obs-1", "2025-01-01T00:00:00Z", 1]);
|
||||
let result: Result<EventDeserializer, _> = serde_json::from_value(json);
|
||||
|
||||
|
||||
assert!(result.is_ok());
|
||||
let deserializer = result.unwrap();
|
||||
assert!(deserializer.diagnostics.is_empty());
|
||||
|
|
@ -518,7 +543,7 @@ mod tests {
|
|||
fn test_deserialize_add_event() {
|
||||
let json = json!(["add", "/count", 42, "obs-1"]);
|
||||
let result: Result<EventDeserializer, _> = serde_json::from_value(json);
|
||||
|
||||
|
||||
assert!(result.is_ok());
|
||||
let deserializer = result.unwrap();
|
||||
assert!(deserializer.diagnostics.is_empty());
|
||||
|
|
@ -533,11 +558,14 @@ mod tests {
|
|||
fn test_deserialize_invalid_event_type() {
|
||||
let json = json!(["invalid", "some", "data"]);
|
||||
let result: Result<EventDeserializer, _> = serde_json::from_value(json);
|
||||
|
||||
|
||||
assert!(result.is_ok());
|
||||
let deserializer = result.unwrap();
|
||||
assert_eq!(deserializer.diagnostics.len(), 1);
|
||||
assert_eq!(deserializer.diagnostics[0].code, DiagnosticCode::UnknownEventType);
|
||||
assert_eq!(
|
||||
deserializer.diagnostics[0].code,
|
||||
DiagnosticCode::UnknownEventType
|
||||
);
|
||||
assert!(deserializer.event.is_none());
|
||||
}
|
||||
|
||||
|
|
@ -545,11 +573,14 @@ mod tests {
|
|||
fn test_deserialize_wrong_field_count() {
|
||||
let json = json!(["observe", "obs-1"]);
|
||||
let result: Result<EventDeserializer, _> = serde_json::from_value(json);
|
||||
|
||||
|
||||
assert!(result.is_ok());
|
||||
let deserializer = result.unwrap();
|
||||
assert_eq!(deserializer.diagnostics.len(), 1);
|
||||
assert_eq!(deserializer.diagnostics[0].code, DiagnosticCode::WrongFieldCount);
|
||||
assert_eq!(
|
||||
deserializer.diagnostics[0].code,
|
||||
DiagnosticCode::WrongFieldCount
|
||||
);
|
||||
assert!(deserializer.event.is_none());
|
||||
}
|
||||
|
||||
|
|
@ -557,7 +588,7 @@ mod tests {
|
|||
fn test_deserialize_move_event() {
|
||||
let json = json!(["move", "/items", [[0, 2], [1, 0]], "obs-1"]);
|
||||
let result: Result<EventDeserializer, _> = serde_json::from_value(json);
|
||||
|
||||
|
||||
assert!(result.is_ok());
|
||||
let deserializer = result.unwrap();
|
||||
assert!(deserializer.diagnostics.is_empty());
|
||||
|
|
@ -567,4 +598,4 @@ mod tests {
|
|||
if path == "/items" && moves == vec![(0, 2), (1, 0)] && observation_id == "obs-1"
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -89,7 +89,6 @@ pub enum Event {
|
|||
},
|
||||
}
|
||||
|
||||
|
||||
impl Serialize for Event {
|
||||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ use std::path::PathBuf;
|
|||
|
||||
xflags::xflags! {
|
||||
cmd json-archive {
|
||||
default cmd create {
|
||||
default cmd write {
|
||||
/// Input JSON files in chronological order. If first file is a .json.archive file,
|
||||
/// appends remaining files to it. Otherwise creates a new archive from all files.
|
||||
repeated inputs: PathBuf
|
||||
|
|
|
|||
14
src/lib.rs
14
src/lib.rs
|
|
@ -19,12 +19,11 @@
|
|||
// marxism@peoplesgrocers.com
|
||||
//
|
||||
|
||||
pub mod archive_context;
|
||||
pub mod archive_open;
|
||||
pub mod archive_ops;
|
||||
pub mod archive_reader;
|
||||
pub mod archive_writer;
|
||||
pub mod atomic_file;
|
||||
pub mod compression_writer;
|
||||
pub mod detection;
|
||||
pub mod diagnostics;
|
||||
pub mod diff;
|
||||
|
|
@ -33,12 +32,17 @@ pub mod events;
|
|||
pub mod flags;
|
||||
pub mod pointer;
|
||||
mod pointer_errors;
|
||||
pub mod write_strategy;
|
||||
|
||||
pub use archive_writer::{
|
||||
append_to_archive, create_archive_from_files, default_output_filename, ArchiveBuilder, ArchiveWriter,
|
||||
pub use archive_reader::{
|
||||
apply_add, apply_change, apply_move, apply_remove, read_archive, read_events, EventIterator,
|
||||
ReadMode, ReadResult,
|
||||
};
|
||||
pub use archive_writer::{default_output_filename, write_observation, ArchiveWriter};
|
||||
pub use detection::is_json_archive;
|
||||
pub use diagnostics::{Diagnostic, DiagnosticCode, DiagnosticCollector, DiagnosticLevel};
|
||||
pub use events::{Event, Header, Observation};
|
||||
pub use pointer::JsonPointer;
|
||||
pub use archive_reader::{apply_add, apply_change, apply_move, apply_remove, ArchiveReader, ReadMode, ReadResult};
|
||||
pub use write_strategy::{
|
||||
compression_from_extension, determine_strategy, CompressedPath, WriteStrategy,
|
||||
};
|
||||
|
|
|
|||
171
src/main.rs
171
src/main.rs
|
|
@ -19,9 +19,7 @@
|
|||
// marxism@peoplesgrocers.com
|
||||
//
|
||||
|
||||
use json_archive::archive_ops::{append_to_archive, create_archive, default_output_filename};
|
||||
use json_archive::{is_json_archive, Diagnostic, DiagnosticCode, DiagnosticLevel};
|
||||
use std::path::Path;
|
||||
use json_archive::Diagnostic;
|
||||
use std::process;
|
||||
|
||||
mod cmd;
|
||||
|
|
@ -30,169 +28,22 @@ mod flags;
|
|||
fn main() {
|
||||
let flags = flags::JsonArchive::from_env_or_exit();
|
||||
|
||||
let diagnostics = run(flags);
|
||||
if let Err(diagnostics) = run(flags) {
|
||||
for diagnostic in &diagnostics {
|
||||
eprintln!("{}", diagnostic);
|
||||
}
|
||||
|
||||
for diagnostic in &diagnostics {
|
||||
eprintln!("{}", diagnostic);
|
||||
}
|
||||
|
||||
let has_fatal = diagnostics.iter().any(|d| d.is_fatal());
|
||||
if has_fatal {
|
||||
process::exit(1);
|
||||
let has_fatal = diagnostics.iter().any(|d| d.is_fatal());
|
||||
if has_fatal {
|
||||
process::exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn run(flags: flags::JsonArchive) -> Vec<Diagnostic> {
|
||||
fn run(flags: flags::JsonArchive) -> Result<(), Vec<Diagnostic>> {
|
||||
match flags.subcommand {
|
||||
flags::JsonArchiveCmd::Create(create_flags) => run_create(&create_flags),
|
||||
flags::JsonArchiveCmd::Write(write_flags) => cmd::write::run(&write_flags),
|
||||
flags::JsonArchiveCmd::Info(info_flags) => cmd::info::run(&info_flags),
|
||||
flags::JsonArchiveCmd::State(state_flags) => cmd::state::run(&state_flags),
|
||||
}
|
||||
}
|
||||
|
||||
struct ParsedCreateArgs {
|
||||
destination: std::path::PathBuf,
|
||||
input_files: Vec<std::path::PathBuf>,
|
||||
}
|
||||
|
||||
/// Parse the create command arguments to determine the destination archive and input files.
|
||||
/// This consolidates all the inferring behavior in one place.
|
||||
fn parse_create_args(flags: &flags::Create) -> Result<ParsedCreateArgs, Vec<Diagnostic>> {
|
||||
if flags.inputs.is_empty() {
|
||||
return Err(vec![Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::MissingHeaderField,
|
||||
"I need at least one JSON file to create an archive, but you didn't provide any."
|
||||
.to_string(),
|
||||
)
|
||||
.with_advice(
|
||||
"Usage: json-archive <file1.json> [file2.json ...]\n\n\
|
||||
The first file will be used as the initial state, and subsequent files \
|
||||
will be compared to generate change events."
|
||||
.to_string(),
|
||||
)]);
|
||||
}
|
||||
|
||||
// Determine the destination archive path
|
||||
let destination = if let Some(output) = &flags.output {
|
||||
// Explicitly specified output path
|
||||
output.clone()
|
||||
} else if Path::new(&flags.inputs[0]).exists()
|
||||
&& is_json_archive(&flags.inputs[0]).unwrap_or(false)
|
||||
{
|
||||
// First input is an existing archive - use it as destination
|
||||
flags.inputs[0].clone()
|
||||
} else {
|
||||
// Infer from first input
|
||||
default_output_filename(&flags.inputs[0])
|
||||
};
|
||||
|
||||
// Filter out the destination from input files to avoid read-write conflicts
|
||||
let input_files: Vec<_> = flags.inputs
|
||||
.iter()
|
||||
.filter(|path| {
|
||||
match (std::fs::canonicalize(path).ok(), std::fs::canonicalize(&destination).ok()) {
|
||||
(Some(p), Some(d)) => p != d,
|
||||
_ => true, // Include if canonicalization fails (file doesn't exist yet)
|
||||
}
|
||||
})
|
||||
.cloned()
|
||||
.collect();
|
||||
|
||||
if input_files.is_empty() {
|
||||
return Err(vec![
|
||||
Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::MissingHeaderField,
|
||||
"No input files remain after filtering out the destination archive.".to_string()
|
||||
)
|
||||
.with_advice(
|
||||
"You specified the output path in the list of input files. This would cause a read-write conflict.\n\
|
||||
Either remove the output path from inputs, or use a different output path with -o."
|
||||
.to_string()
|
||||
)
|
||||
]);
|
||||
}
|
||||
|
||||
// Validate all input files exist
|
||||
let mut diagnostics = Vec::new();
|
||||
for input_path in &input_files {
|
||||
if !Path::new(input_path).exists() {
|
||||
diagnostics.push(
|
||||
Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't find the input file: {}", input_path.display()),
|
||||
)
|
||||
.with_advice(
|
||||
"Make sure the file path is correct and the file exists. \
|
||||
Check for typos in the filename."
|
||||
.to_string(),
|
||||
),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
if !diagnostics.is_empty() {
|
||||
return Err(diagnostics);
|
||||
}
|
||||
|
||||
Ok(ParsedCreateArgs {
|
||||
destination,
|
||||
input_files,
|
||||
})
|
||||
}
|
||||
|
||||
fn run_create(flags: &flags::Create) -> Vec<Diagnostic> {
|
||||
let parsed = match parse_create_args(flags) {
|
||||
Ok(parsed) => parsed,
|
||||
Err(diagnostics) => return diagnostics,
|
||||
};
|
||||
|
||||
if let Some(interval) = flags.snapshot_interval {
|
||||
println!("Snapshot interval: every {} observations", interval);
|
||||
}
|
||||
|
||||
if let Some(ref source) = flags.source {
|
||||
println!("Source: {}", source);
|
||||
}
|
||||
|
||||
// If destination exists and is an archive, append to it
|
||||
if Path::new(&parsed.destination).exists() {
|
||||
if let Ok(true) = is_json_archive(&parsed.destination) {
|
||||
println!("Appending to existing archive: {}", parsed.destination.display());
|
||||
println!("Input files: {:?}", parsed.input_files);
|
||||
|
||||
let diagnostics = append_to_archive(
|
||||
&parsed.destination,
|
||||
&parsed.input_files,
|
||||
&parsed.destination,
|
||||
flags.source.clone(),
|
||||
flags.snapshot_interval,
|
||||
);
|
||||
|
||||
if diagnostics.is_empty() {
|
||||
println!("Archive updated successfully: {}", parsed.destination.display());
|
||||
}
|
||||
|
||||
return diagnostics;
|
||||
}
|
||||
}
|
||||
|
||||
// Otherwise create a new archive from the input files
|
||||
println!("Creating new archive: {}", parsed.destination.display());
|
||||
println!("Input files: {:?}", parsed.input_files);
|
||||
|
||||
let diagnostics = create_archive(
|
||||
&parsed.input_files,
|
||||
parsed.destination.clone(),
|
||||
flags.source.clone(),
|
||||
flags.snapshot_interval,
|
||||
);
|
||||
|
||||
if diagnostics.is_empty() {
|
||||
println!("Archive created successfully: {}", parsed.destination.display());
|
||||
}
|
||||
|
||||
diagnostics
|
||||
}
|
||||
|
|
|
|||
|
|
@ -390,7 +390,12 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn test_type_mismatch_error_output() {
|
||||
let tokens = vec!["users".to_string(), "0".to_string(), "email".to_string(), "domain".to_string()];
|
||||
let tokens = vec![
|
||||
"users".to_string(),
|
||||
"0".to_string(),
|
||||
"email".to_string(),
|
||||
"domain".to_string(),
|
||||
];
|
||||
let current = Value::String("alice@example.com".to_string());
|
||||
let diag = build_type_mismatch_error(&tokens, 3, "domain", ¤t);
|
||||
|
||||
|
|
|
|||
352
src/write_strategy.rs
Normal file
352
src/write_strategy.rs
Normal file
|
|
@ -0,0 +1,352 @@
|
|||
// json-archive is a tool for tracking JSON file changes over time
|
||||
// Copyright (C) 2025 Peoples Grocers LLC
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as published
|
||||
// by the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
//
|
||||
// To purchase a license under different terms contact admin@peoplesgrocers.com
|
||||
// To request changes, report bugs, or give user feedback contact
|
||||
// marxism@peoplesgrocers.com
|
||||
//
|
||||
|
||||
//! Write strategy for archive operations.
|
||||
//!
|
||||
//! There are exactly two questions:
|
||||
//! 1. Where do we write? (dest_path)
|
||||
//! 2. Can we write there directly, or do we need to dance?
|
||||
//!
|
||||
//! The dance (temp file + atomic swap) is required when:
|
||||
//! - source_path == dest_path, AND
|
||||
//! - the file is compressed
|
||||
//!
|
||||
//! Why? Compressed streams don't support append. To add one
|
||||
//! record to a gzip file, you decompress everything, add the
|
||||
//! record, recompress everything. If you write to the same
|
||||
//! file you're reading, you corrupt it mid-operation.
|
||||
//!
|
||||
//! So: write to temp, swap when done. See atomic_file.rs.
|
||||
//!
|
||||
//! When source != dest, there is no conflict. Read from source,
|
||||
//! write to dest. Even if source is compressed. Even if dest
|
||||
//! is compressed. Even if they use different compression.
|
||||
//! The source is never modified.
|
||||
//!
|
||||
//! When source == dest AND uncompressed, just append. Seek to
|
||||
//! end, write new records. Simple.
|
||||
//!
|
||||
//! The output compression format is determined by dest_path's
|
||||
//! extension, not the source's format. That's a separate concern.
|
||||
//!
|
||||
//! ## Truth Table
|
||||
//!
|
||||
//! ```text
|
||||
//! INPUTS OUTPUT FLAG STRATEGY
|
||||
//! ───────────────────────────────────────────────────────────────
|
||||
//! [A.json, B.json] (none) Create { out: A.json.archive, fmt: None }
|
||||
//! [A.json, B.json] -o X.archive.gz Create { out: X.archive.gz, fmt: Gzip }
|
||||
//!
|
||||
//! [A.archive, B.json] (none) Append { path: A.archive }
|
||||
//! [A.archive, B.json] -o X.archive Direct { in: A.archive/None, out: X.archive/None }
|
||||
//!
|
||||
//! [A.archive.gz, B.json] (none) AtomicSwap { path: A.archive.gz, fmt: Gzip, temp: .A.archive.gz.xxx }
|
||||
//! [A.archive.gz, B.json] -o A.archive.gz AtomicSwap { path: A.archive.gz, fmt: Gzip, temp: .A.archive.gz.xxx }
|
||||
//! [A.archive.gz, B.json] -o X.archive Direct { in: A.archive.gz/Gzip, out: X.archive/None }
|
||||
//! [A.archive.gz, B.json] -o X.archive.br Direct { in: A.archive.gz/Gzip, out: X.archive.br/Brotli }
|
||||
//! ```
|
||||
//!
|
||||
//! The rule:
|
||||
//! ```text
|
||||
//! if creating new archive:
|
||||
//! Create
|
||||
//! else if source != dest:
|
||||
//! Direct (read from source, write to dest, transcoding as needed)
|
||||
//! else if source == dest AND uncompressed:
|
||||
//! Append (seek to end, write)
|
||||
//! else if source == dest AND compressed:
|
||||
//! AtomicSwap (read all, write to temp, swap)
|
||||
//! ```
|
||||
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use crate::atomic_file::generate_temp_filename;
|
||||
use crate::detection::CompressionFormat;
|
||||
|
||||
/// A path with its compression format.
|
||||
pub type CompressedPath = (PathBuf, CompressionFormat);
|
||||
|
||||
/// Describes how to write archive data based on input/output paths and compression.
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum WriteStrategy {
|
||||
/// Create a new archive from scratch. No existing archive to read.
|
||||
Create { output: CompressedPath },
|
||||
|
||||
/// Append to an existing uncompressed archive in-place.
|
||||
/// Just seek to end and write new records.
|
||||
Append { path: PathBuf },
|
||||
|
||||
/// Read from one location, write to another.
|
||||
/// Handles transcoding between compression formats.
|
||||
CopyOnWrite {
|
||||
input: CompressedPath,
|
||||
output: CompressedPath,
|
||||
},
|
||||
|
||||
/// Read compressed archive, write to temp, atomic swap.
|
||||
/// Required when source == dest AND compressed.
|
||||
AtomicSwap {
|
||||
/// The archive path (both input and output)
|
||||
path: PathBuf,
|
||||
/// Compression format (same for input and output in this case)
|
||||
compression: CompressionFormat,
|
||||
/// Temp file to write to before swapping
|
||||
temp_path: PathBuf,
|
||||
},
|
||||
}
|
||||
|
||||
/// Determine compression format from file extension.
|
||||
///
|
||||
/// Returns `CompressionFormat::None` for uncompressed files.
|
||||
pub fn compression_from_extension(path: &Path) -> CompressionFormat {
|
||||
let s = path.to_string_lossy();
|
||||
if s.ends_with(".gz") {
|
||||
CompressionFormat::Gzip
|
||||
} else if s.ends_with(".br") {
|
||||
CompressionFormat::Brotli
|
||||
} else if s.ends_with(".zst") {
|
||||
CompressionFormat::Zstd
|
||||
} else if s.ends_with(".zlib") {
|
||||
CompressionFormat::Zlib
|
||||
} else {
|
||||
CompressionFormat::None
|
||||
}
|
||||
}
|
||||
|
||||
/// Determine write strategy from parsed arguments.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `source_archive` - Path to existing archive if appending, None if creating new
|
||||
/// * `dest_path` - Where to write the output
|
||||
/// * `source_compression` - Compression format of source (from magic bytes). Pass
|
||||
/// `CompressionFormat::None` if unknown or uncompressed.
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// The appropriate `WriteStrategy` for this operation.
|
||||
pub fn determine_strategy(
|
||||
source_archive: Option<&Path>,
|
||||
dest_path: &Path,
|
||||
source_compression: CompressionFormat,
|
||||
) -> WriteStrategy {
|
||||
let dest_compression = compression_from_extension(dest_path);
|
||||
|
||||
// No source archive? Creating new.
|
||||
let Some(source) = source_archive else {
|
||||
return WriteStrategy::Create {
|
||||
output: (dest_path.to_path_buf(), dest_compression),
|
||||
};
|
||||
};
|
||||
|
||||
// Check if source and dest are the same file
|
||||
let same_file = match (source.canonicalize(), dest_path.canonicalize()) {
|
||||
(Ok(s), Ok(d)) => s == d,
|
||||
// dest doesn't exist yet, or other error - not same file
|
||||
_ => false,
|
||||
};
|
||||
|
||||
if !same_file {
|
||||
// Different files: read from source, write to dest
|
||||
let source_fmt = if source_compression == CompressionFormat::None {
|
||||
compression_from_extension(source)
|
||||
} else {
|
||||
source_compression
|
||||
};
|
||||
return WriteStrategy::CopyOnWrite {
|
||||
input: (source.to_path_buf(), source_fmt),
|
||||
output: (dest_path.to_path_buf(), dest_compression),
|
||||
};
|
||||
}
|
||||
|
||||
// Same file - check if compressed
|
||||
let compression = if source_compression == CompressionFormat::None {
|
||||
compression_from_extension(source)
|
||||
} else {
|
||||
source_compression
|
||||
};
|
||||
|
||||
match compression {
|
||||
CompressionFormat::None => {
|
||||
// Uncompressed: can append in-place
|
||||
WriteStrategy::Append {
|
||||
path: dest_path.to_path_buf(),
|
||||
}
|
||||
}
|
||||
fmt => {
|
||||
// Compressed: need atomic swap
|
||||
WriteStrategy::AtomicSwap {
|
||||
path: dest_path.to_path_buf(),
|
||||
compression: fmt,
|
||||
temp_path: generate_temp_filename(dest_path),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::io::Write as IoWrite;
|
||||
use tempfile::NamedTempFile;
|
||||
|
||||
#[test]
|
||||
fn test_compression_from_extension() {
|
||||
assert_eq!(
|
||||
compression_from_extension(Path::new("foo.json.archive.gz")),
|
||||
CompressionFormat::Gzip
|
||||
);
|
||||
assert_eq!(
|
||||
compression_from_extension(Path::new("foo.json.archive.br")),
|
||||
CompressionFormat::Brotli
|
||||
);
|
||||
assert_eq!(
|
||||
compression_from_extension(Path::new("foo.json.archive.zst")),
|
||||
CompressionFormat::Zstd
|
||||
);
|
||||
assert_eq!(
|
||||
compression_from_extension(Path::new("foo.json.archive.zlib")),
|
||||
CompressionFormat::Zlib
|
||||
);
|
||||
assert_eq!(
|
||||
compression_from_extension(Path::new("foo.json.archive")),
|
||||
CompressionFormat::None
|
||||
);
|
||||
assert_eq!(
|
||||
compression_from_extension(Path::new("foo.json")),
|
||||
CompressionFormat::None
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_create_new_archive() {
|
||||
let dest = Path::new("/tmp/new.json.archive");
|
||||
let strategy = determine_strategy(None, dest, CompressionFormat::None);
|
||||
|
||||
match strategy {
|
||||
WriteStrategy::Create { output } => {
|
||||
assert_eq!(output.0, PathBuf::from("/tmp/new.json.archive"));
|
||||
assert_eq!(output.1, CompressionFormat::None);
|
||||
}
|
||||
_ => panic!("Expected Create strategy"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_create_new_compressed_archive() {
|
||||
let dest = Path::new("/tmp/new.json.archive.gz");
|
||||
let strategy = determine_strategy(None, dest, CompressionFormat::None);
|
||||
|
||||
match strategy {
|
||||
WriteStrategy::Create { output } => {
|
||||
assert_eq!(output.0, PathBuf::from("/tmp/new.json.archive.gz"));
|
||||
assert_eq!(output.1, CompressionFormat::Gzip);
|
||||
}
|
||||
_ => panic!("Expected Create strategy"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_append_uncompressed_same_file() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let mut temp = NamedTempFile::with_suffix(".json.archive")?;
|
||||
writeln!(temp, "test")?;
|
||||
temp.flush()?;
|
||||
|
||||
let path = temp.path();
|
||||
let strategy = determine_strategy(Some(path), path, CompressionFormat::None);
|
||||
|
||||
match strategy {
|
||||
WriteStrategy::Append { path: p } => {
|
||||
assert_eq!(p, path);
|
||||
}
|
||||
_ => panic!("Expected Append strategy, got {:?}", strategy),
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_atomic_swap_compressed_same_file() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let mut temp = NamedTempFile::with_suffix(".json.archive.gz")?;
|
||||
writeln!(temp, "test")?;
|
||||
temp.flush()?;
|
||||
|
||||
let path = temp.path();
|
||||
let strategy = determine_strategy(Some(path), path, CompressionFormat::Gzip);
|
||||
|
||||
match strategy {
|
||||
WriteStrategy::AtomicSwap {
|
||||
path: p,
|
||||
compression,
|
||||
temp_path,
|
||||
} => {
|
||||
assert_eq!(p, path);
|
||||
assert_eq!(compression, CompressionFormat::Gzip);
|
||||
assert!(temp_path.to_string_lossy().contains(".json.archive.gz"));
|
||||
}
|
||||
_ => panic!("Expected AtomicSwap strategy, got {:?}", strategy),
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_direct_different_files() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let mut source = NamedTempFile::with_suffix(".json.archive")?;
|
||||
writeln!(source, "test")?;
|
||||
source.flush()?;
|
||||
|
||||
let dest = Path::new("/tmp/different.json.archive");
|
||||
let strategy = determine_strategy(Some(source.path()), dest, CompressionFormat::None);
|
||||
|
||||
match strategy {
|
||||
WriteStrategy::CopyOnWrite { input, output } => {
|
||||
assert_eq!(input.0, source.path());
|
||||
assert_eq!(input.1, CompressionFormat::None);
|
||||
assert_eq!(output.0, PathBuf::from("/tmp/different.json.archive"));
|
||||
assert_eq!(output.1, CompressionFormat::None);
|
||||
}
|
||||
_ => panic!("Expected Direct strategy, got {:?}", strategy),
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_direct_transcode_compression() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let mut source = NamedTempFile::with_suffix(".json.archive.gz")?;
|
||||
writeln!(source, "test")?;
|
||||
source.flush()?;
|
||||
|
||||
let dest = Path::new("/tmp/output.json.archive.br");
|
||||
let strategy = determine_strategy(Some(source.path()), dest, CompressionFormat::Gzip);
|
||||
|
||||
match strategy {
|
||||
WriteStrategy::CopyOnWrite { input, output } => {
|
||||
assert_eq!(input.1, CompressionFormat::Gzip);
|
||||
assert_eq!(output.1, CompressionFormat::Brotli);
|
||||
}
|
||||
_ => panic!("Expected Direct strategy, got {:?}", strategy),
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
|
@ -1,64 +1,94 @@
|
|||
// Integration tests for compressed archive functionality
|
||||
|
||||
use json_archive::{append_to_archive, ArchiveWriter, Header};
|
||||
use json_archive::{ArchiveReader, ReadMode};
|
||||
use serde_json::json;
|
||||
use std::io::Write;
|
||||
use json_archive::archive_open::open_archive;
|
||||
use json_archive::write_observation;
|
||||
use json_archive::{read_archive, ReadMode};
|
||||
use serde_json::{json, Value};
|
||||
use std::fs::File;
|
||||
use std::io::{BufWriter, Write};
|
||||
use tempfile::NamedTempFile;
|
||||
|
||||
#[test]
|
||||
#[cfg(feature = "compression")]
|
||||
fn test_append_to_compressed_archive_basic() -> Result<(), Box<dyn std::error::Error>> {
|
||||
fn test_append_to_compressed_archive_basic() {
|
||||
use flate2::write::GzEncoder;
|
||||
use flate2::Compression;
|
||||
|
||||
// Create initial archive
|
||||
let archive_file = NamedTempFile::with_suffix(".json.archive")?;
|
||||
let header = Header::new(json!({"count": 0}), Some("test".to_string()));
|
||||
// Create initial archive with one state
|
||||
let initial_state = create_json_file(&json!({"count": 0}));
|
||||
let archive_file = NamedTempFile::with_suffix(".json.archive").unwrap();
|
||||
|
||||
#[allow(unused_assignments)]
|
||||
{
|
||||
let mut writer = ArchiveWriter::new(archive_file.path(), None)
|
||||
.map_err(|e| format!("Failed to create writer: {:?}", e))?;
|
||||
writer.write_header(&header)
|
||||
.map_err(|e| format!("Failed to write header: {:?}", e))?;
|
||||
writer.finish()
|
||||
.map_err(|e| format!("Failed to finish: {:?}", e))?;
|
||||
let file = File::create(archive_file.path()).unwrap();
|
||||
let mut writer = BufWriter::new(file);
|
||||
let mut current_state = Value::Null;
|
||||
let mut observation_count: usize = 0;
|
||||
|
||||
current_state = write_observation(
|
||||
&mut writer,
|
||||
&mut observation_count,
|
||||
None,
|
||||
¤t_state,
|
||||
&initial_state.path().to_path_buf(),
|
||||
Some("test".to_string()),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
writer.flush().unwrap();
|
||||
}
|
||||
|
||||
dump_file(archive_file.path(), "Uncompressed archive");
|
||||
|
||||
// Compress it
|
||||
let compressed_file = NamedTempFile::with_suffix(".json.archive.gz")?;
|
||||
let compressed_file = NamedTempFile::with_suffix(".json.archive.gz").unwrap();
|
||||
{
|
||||
let input = std::fs::read(archive_file.path())?;
|
||||
let input = std::fs::read(archive_file.path()).unwrap();
|
||||
let mut encoder = GzEncoder::new(
|
||||
compressed_file.as_file().try_clone()?,
|
||||
Compression::default()
|
||||
compressed_file.as_file().try_clone().unwrap(),
|
||||
Compression::default(),
|
||||
);
|
||||
encoder.write_all(&input)?;
|
||||
encoder.finish()?;
|
||||
encoder.write_all(&input).unwrap();
|
||||
encoder.finish().unwrap();
|
||||
}
|
||||
|
||||
// Create a new state file to append
|
||||
let mut state_file = NamedTempFile::new()?;
|
||||
writeln!(state_file, r#"{{"count": 1}}"#)?;
|
||||
state_file.flush()?;
|
||||
dump_file(compressed_file.path(), "Compressed archive");
|
||||
|
||||
// Append to compressed archive
|
||||
let diagnostics = append_to_archive(
|
||||
compressed_file.path(),
|
||||
&[state_file.path()],
|
||||
compressed_file.path(),
|
||||
None,
|
||||
None,
|
||||
);
|
||||
// Verify the compressed archive can be read
|
||||
let opened = open_archive(compressed_file.path()).unwrap();
|
||||
let result = read_archive(
|
||||
opened.reader,
|
||||
&compressed_file.path().display().to_string(),
|
||||
ReadMode::FullValidation,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
// Should succeed with no diagnostics
|
||||
assert!(diagnostics.is_empty(), "Got diagnostics: {:?}", diagnostics);
|
||||
eprintln!("=== Reader result ===");
|
||||
eprintln!("final_state: {:?}", result.final_state);
|
||||
eprintln!("observation_count: {}", result.observation_count);
|
||||
eprintln!("diagnostics: {:?}", result.diagnostics);
|
||||
eprintln!();
|
||||
|
||||
// Verify the archive was updated (decompressed)
|
||||
let reader = ArchiveReader::new(compressed_file.path(), ReadMode::FullValidation)?;
|
||||
let result = reader.read(compressed_file.path())?;
|
||||
assert_eq!(result.final_state, json!({"count": 1}));
|
||||
assert_eq!(result.observation_count, 1);
|
||||
|
||||
Ok(())
|
||||
assert_eq!(result.final_state, json!({"count": 0}));
|
||||
assert_eq!(result.observation_count, 0);
|
||||
}
|
||||
|
||||
/// Helper to create a temp file with JSON content
|
||||
fn create_json_file(content: &Value) -> NamedTempFile {
|
||||
let mut file = NamedTempFile::new().expect("Failed to create temp file");
|
||||
writeln!(file, "{}", serde_json::to_string(content).unwrap()).unwrap();
|
||||
file
|
||||
}
|
||||
|
||||
/// Debug helper: print file contents as both hex and text
|
||||
fn dump_file(path: &std::path::Path, label: &str) {
|
||||
let bytes = std::fs::read(path).unwrap();
|
||||
eprintln!("=== {} ({} bytes) ===", label, bytes.len());
|
||||
eprintln!("Hex: {:02x?}", &bytes[..bytes.len().min(100)]);
|
||||
if let Ok(text) = std::str::from_utf8(&bytes) {
|
||||
eprintln!("Text:\n{}", &text[..text.len().min(500)]);
|
||||
} else {
|
||||
eprintln!("(not valid UTF-8)");
|
||||
}
|
||||
eprintln!();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -44,11 +44,11 @@ echo "Final archive info:"
|
|||
# Decompress for manual inspection
|
||||
echo ""
|
||||
echo "Decompressing for comparison..."
|
||||
brotli -d -k "$OUT_DIR/test.json.archive.br"
|
||||
brotli -d -k "$OUT_DIR/test.json.archive.br" -o "$OUT_DIR/test-decompressed.json.archive"
|
||||
|
||||
echo ""
|
||||
echo "Decompressed archive info:"
|
||||
"$BINARY" info "$OUT_DIR/test.json.archive"
|
||||
"$BINARY" info "$OUT_DIR/test-decompressed.json.archive"
|
||||
|
||||
echo ""
|
||||
echo "Files in $OUT_DIR:"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue