refactor: decompose archive read/write into composable building blocks
Delete archive_context.rs and archive_ops.rs (1200+ lines of duplicated logic). Replace with four focused modules: 1. open_archive() - opens a file, detects compression, returns raw bytes 2. read_archive() - parses bytes into validated observations 3. CompressionWriter - writes bytes with any compression format 4. WriteStrategy - given a list of files, determines input archive, output archive, output format, and which of four write modes to use: - Create: new archive, no input - Append: uncompressed input, seek to end - AtomicSwap: compressed input, rewrite via temp file - CopyOnWrite: different input/output paths, transcode between formats Previously you could not specify output format. Appending always preserved the input format, creating compressed archives didn't work. Now all four cases work with any supported compression format. Atomic swap now writes to temp file, then renames. Crash-safe. Trade-off: This approach prioritizes code clarity over syscall efficiency. The archive file may be opened and read multiple times during a single operation (once for format detection, once for reading state, once for copying content). A more optimized implementation could reuse file handles, but the current approach makes each step's purpose obvious.
This commit is contained in:
parent
a760114ffe
commit
b65103c9f7
28 changed files with 2055 additions and 2859 deletions
|
|
@ -21,7 +21,9 @@
|
|||
|
||||
use crate::flags;
|
||||
use chrono::{DateTime, Utc};
|
||||
use json_archive::{Diagnostic, DiagnosticCode, DiagnosticLevel, Event};
|
||||
use json_archive::archive_open::open_archive;
|
||||
use json_archive::detection::CompressionFormat;
|
||||
use json_archive::{read_events, Diagnostic, DiagnosticCode, DiagnosticLevel, Event};
|
||||
use serde::Serialize;
|
||||
use std::path::Path;
|
||||
|
||||
|
|
@ -46,6 +48,7 @@ struct JsonObservation {
|
|||
#[derive(Serialize)]
|
||||
struct JsonInfoOutput {
|
||||
archive: String,
|
||||
compression: String,
|
||||
created: String,
|
||||
file_size: u64,
|
||||
snapshot_count: usize,
|
||||
|
|
@ -54,9 +57,9 @@ struct JsonInfoOutput {
|
|||
efficiency_percent: f64,
|
||||
}
|
||||
|
||||
pub fn run(flags: &flags::Info) -> Vec<Diagnostic> {
|
||||
pub fn run(flags: &flags::Info) -> Result<(), Vec<Diagnostic>> {
|
||||
if !flags.file.exists() {
|
||||
return vec![Diagnostic::new(
|
||||
return Err(vec![Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't find the archive file: {}", flags.file.display()),
|
||||
|
|
@ -65,12 +68,13 @@ pub fn run(flags: &flags::Info) -> Vec<Diagnostic> {
|
|||
"Make sure the file path is correct and the file exists. \
|
||||
Check for typos in the filename."
|
||||
.to_string(),
|
||||
)];
|
||||
)]);
|
||||
}
|
||||
|
||||
let (observations, snapshot_count) = match collect_observations(&flags.file) {
|
||||
Ok((obs, count)) => (obs, count),
|
||||
Err(diagnostics) => return diagnostics,
|
||||
let (observations, snapshot_count, compression_format) = match collect_observations(&flags.file)
|
||||
{
|
||||
Ok((obs, count, format)) => (obs, count, format),
|
||||
Err(diagnostics) => return Err(diagnostics),
|
||||
};
|
||||
|
||||
let file_size = match std::fs::metadata(&flags.file) {
|
||||
|
|
@ -79,7 +83,10 @@ pub fn run(flags: &flags::Info) -> Vec<Diagnostic> {
|
|||
};
|
||||
|
||||
// Calculate total JSON size (sum of all observations + newline separators)
|
||||
let total_json_size: u64 = observations.iter().map(|obs| obs.json_size as u64).sum::<u64>()
|
||||
let total_json_size: u64 = observations
|
||||
.iter()
|
||||
.map(|obs| obs.json_size as u64)
|
||||
.sum::<u64>()
|
||||
+ (observations.len() as u64).saturating_sub(1); // Add newlines between observations
|
||||
|
||||
let efficiency_percent = if total_json_size > 0 {
|
||||
|
|
@ -96,6 +103,7 @@ pub fn run(flags: &flags::Info) -> Vec<Diagnostic> {
|
|||
if observations.is_empty() {
|
||||
let empty_output = JsonInfoOutput {
|
||||
archive: flags.file.display().to_string(),
|
||||
compression: compression_format.to_string(),
|
||||
created: "".to_string(),
|
||||
file_size,
|
||||
snapshot_count,
|
||||
|
|
@ -107,7 +115,7 @@ pub fn run(flags: &flags::Info) -> Vec<Diagnostic> {
|
|||
"{}",
|
||||
serde_json::to_string_pretty(&empty_output).unwrap_or_default()
|
||||
);
|
||||
return Vec::new();
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let json_observations: Vec<JsonObservation> = observations
|
||||
|
|
@ -128,6 +136,7 @@ pub fn run(flags: &flags::Info) -> Vec<Diagnostic> {
|
|||
|
||||
let json_output = JsonInfoOutput {
|
||||
archive: flags.file.display().to_string(),
|
||||
compression: compression_format.to_string(),
|
||||
created: observations[0].created.to_rfc3339(),
|
||||
file_size,
|
||||
snapshot_count,
|
||||
|
|
@ -143,10 +152,11 @@ pub fn run(flags: &flags::Info) -> Vec<Diagnostic> {
|
|||
} else {
|
||||
// Human-readable output mode
|
||||
println!("Archive: {}", flags.file.display());
|
||||
println!("Compression: {}", compression_format);
|
||||
|
||||
if observations.is_empty() {
|
||||
println!("No observations found");
|
||||
return Vec::new();
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let first_timestamp = &observations[0].created;
|
||||
|
|
@ -217,56 +227,26 @@ pub fn run(flags: &flags::Info) -> Vec<Diagnostic> {
|
|||
snapshot_text,
|
||||
comparison
|
||||
);
|
||||
println!(
|
||||
"Data size: {}",
|
||||
format_size(total_json_size)
|
||||
);
|
||||
println!("Data size: {}", format_size(total_json_size));
|
||||
|
||||
// Add usage instructions
|
||||
println!();
|
||||
println!("To get the JSON value at a specific observation:");
|
||||
println!(" json-archive state --index <#> {}", flags.file.display());
|
||||
println!(
|
||||
" json-archive state --id <observation-id> {}",
|
||||
flags.file.display()
|
||||
);
|
||||
println!();
|
||||
println!("Examples:");
|
||||
println!(
|
||||
" json-archive state --index 0 {} # Get initial state",
|
||||
flags.file.display()
|
||||
);
|
||||
println!(
|
||||
" json-archive state --index 2 {} # Get state after observation 2",
|
||||
flags.file.display()
|
||||
);
|
||||
println!(" json-archive state --index <#> <archive>");
|
||||
println!(" json-archive state --id <observation-id> <archive>");
|
||||
}
|
||||
|
||||
Vec::new()
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn collect_observations(file_path: &Path) -> Result<(Vec<ObservationInfo>, usize), Vec<Diagnostic>> {
|
||||
let reader = match json_archive::ArchiveReader::new(file_path, json_archive::ReadMode::AppendSeek) {
|
||||
Ok(r) => r,
|
||||
Err(e) => {
|
||||
return Err(vec![Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't open the archive file: {}", e),
|
||||
)]);
|
||||
}
|
||||
};
|
||||
fn collect_observations(
|
||||
file_path: &Path,
|
||||
) -> Result<(Vec<ObservationInfo>, usize, CompressionFormat), Vec<Diagnostic>> {
|
||||
let opened = open_archive(file_path)?;
|
||||
let compression_format = opened.format;
|
||||
|
||||
let (initial_state, mut event_iter) = match reader.events(file_path) {
|
||||
Ok(r) => r,
|
||||
Err(e) => {
|
||||
return Err(vec![Diagnostic::new(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::PathNotFound,
|
||||
format!("I couldn't read the archive file: {}", e),
|
||||
)]);
|
||||
}
|
||||
};
|
||||
let (initial_state, mut event_iter) =
|
||||
read_events(opened.reader, &file_path.display().to_string())?;
|
||||
|
||||
// Check for fatal diagnostics from initial parsing
|
||||
if event_iter.diagnostics.has_fatal() {
|
||||
|
|
@ -295,7 +275,11 @@ fn collect_observations(file_path: &Path) -> Result<(Vec<ObservationInfo>, usize
|
|||
// Iterate through events
|
||||
while let Some(event) = event_iter.next() {
|
||||
match event {
|
||||
Event::Observe { observation_id, timestamp, change_count } => {
|
||||
Event::Observe {
|
||||
observation_id,
|
||||
timestamp,
|
||||
change_count,
|
||||
} => {
|
||||
observations.push(ObservationInfo {
|
||||
id: observation_id,
|
||||
timestamp,
|
||||
|
|
@ -316,7 +300,9 @@ fn collect_observations(file_path: &Path) -> Result<(Vec<ObservationInfo>, usize
|
|||
}
|
||||
}
|
||||
}
|
||||
Event::Change { path, new_value, .. } => {
|
||||
Event::Change {
|
||||
path, new_value, ..
|
||||
} => {
|
||||
let _ = json_archive::apply_change(&mut current_state, &path, new_value);
|
||||
|
||||
// Update the JSON size of the last observation
|
||||
|
|
@ -368,10 +354,9 @@ fn collect_observations(file_path: &Path) -> Result<(Vec<ObservationInfo>, usize
|
|||
}
|
||||
}
|
||||
|
||||
Ok((observations, snapshot_count))
|
||||
Ok((observations, snapshot_count, compression_format))
|
||||
}
|
||||
|
||||
|
||||
fn format_timestamp(dt: &DateTime<Utc>) -> String {
|
||||
dt.format("%a %H:%M:%S %d-%b-%Y").to_string()
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue