refactor: decompose archive read/write into composable building blocks
Delete archive_context.rs and archive_ops.rs (1200+ lines of duplicated logic). Replace with four focused modules: 1. open_archive() - opens a file, detects compression, returns raw bytes 2. read_archive() - parses bytes into validated observations 3. CompressionWriter - writes bytes with any compression format 4. WriteStrategy - given a list of files, determines input archive, output archive, output format, and which of four write modes to use: - Create: new archive, no input - Append: uncompressed input, seek to end - AtomicSwap: compressed input, rewrite via temp file - CopyOnWrite: different input/output paths, transcode between formats Previously you could not specify output format. Appending always preserved the input format, creating compressed archives didn't work. Now all four cases work with any supported compression format. Atomic swap now writes to temp file, then renames. Crash-safe. Trade-off: This approach prioritizes code clarity over syscall efficiency. The archive file may be opened and read multiple times during a single operation (once for format detection, once for reading state, once for copying content). A more optimized implementation could reuse file handles, but the current approach makes each step's purpose obvious.
This commit is contained in:
parent
a760114ffe
commit
b65103c9f7
28 changed files with 2055 additions and 2859 deletions
|
|
@ -43,7 +43,7 @@
|
|||
//!
|
||||
//! Spent 30 minutes looking for existing solutions. Checked:
|
||||
//! - serde_path_to_error: Adds field path context but still returns string errors
|
||||
//! - figment: Configuration library, but sounded like could be used only for diagnostics
|
||||
//! - figment: Configuration library, but sounded like could be used only for diagnostics
|
||||
//! - config/serde_value: Similar issue
|
||||
//! - json5: Relaxed JSON syntax, not diagnostic-focused
|
||||
//! - miette: a diagnostic library for Rust. It includes a series of
|
||||
|
|
@ -63,10 +63,10 @@
|
|||
//! diagnostics vec instead of returning errors. The calling code (reader.rs) attaches
|
||||
//! location information (filename, line number) after deserialization.
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
use serde::de::{Deserialize, Deserializer, SeqAccess, Visitor};
|
||||
use serde_json::Value;
|
||||
use std::fmt;
|
||||
use chrono::{DateTime, Utc};
|
||||
|
||||
use crate::diagnostics::{Diagnostic, DiagnosticCode, DiagnosticLevel};
|
||||
use crate::events::Event;
|
||||
|
|
@ -120,7 +120,7 @@ impl<'de> Visitor<'de> for EventVisitor {
|
|||
A: SeqAccess<'de>,
|
||||
{
|
||||
let mut elements: Vec<Value> = Vec::new();
|
||||
|
||||
|
||||
while let Some(elem) = seq.next_element::<Value>()? {
|
||||
elements.push(elem);
|
||||
}
|
||||
|
|
@ -140,7 +140,8 @@ impl<'de> Visitor<'de> for EventVisitor {
|
|||
self.deserializer.add_diagnostic(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::WrongFieldType,
|
||||
"I expected the first element of an event to be a string event type.".to_string(),
|
||||
"I expected the first element of an event to be a string event type."
|
||||
.to_string(),
|
||||
);
|
||||
return Ok(self.deserializer);
|
||||
}
|
||||
|
|
@ -152,7 +153,10 @@ impl<'de> Visitor<'de> for EventVisitor {
|
|||
self.deserializer.add_diagnostic(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::WrongFieldCount,
|
||||
format!("I expected an observe event to have 4 fields, but found {}.", elements.len()),
|
||||
format!(
|
||||
"I expected an observe event to have 4 fields, but found {}.",
|
||||
elements.len()
|
||||
),
|
||||
);
|
||||
return Ok(self.deserializer);
|
||||
}
|
||||
|
|
@ -176,7 +180,8 @@ impl<'de> Visitor<'de> for EventVisitor {
|
|||
self.deserializer.add_diagnostic(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::WrongFieldType,
|
||||
"I expected the timestamp to be a valid ISO-8601 datetime string.".to_string(),
|
||||
"I expected the timestamp to be a valid ISO-8601 datetime string."
|
||||
.to_string(),
|
||||
);
|
||||
return Ok(self.deserializer);
|
||||
}
|
||||
|
|
@ -215,7 +220,10 @@ impl<'de> Visitor<'de> for EventVisitor {
|
|||
self.deserializer.add_diagnostic(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::WrongFieldCount,
|
||||
format!("I expected an add event to have 4 fields, but found {}.", elements.len()),
|
||||
format!(
|
||||
"I expected an add event to have 4 fields, but found {}.",
|
||||
elements.len()
|
||||
),
|
||||
);
|
||||
return Ok(self.deserializer);
|
||||
}
|
||||
|
|
@ -258,7 +266,10 @@ impl<'de> Visitor<'de> for EventVisitor {
|
|||
self.deserializer.add_diagnostic(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::WrongFieldCount,
|
||||
format!("I expected a change event to have 4 fields, but found {}.", elements.len()),
|
||||
format!(
|
||||
"I expected a change event to have 4 fields, but found {}.",
|
||||
elements.len()
|
||||
),
|
||||
);
|
||||
return Ok(self.deserializer);
|
||||
}
|
||||
|
|
@ -301,7 +312,10 @@ impl<'de> Visitor<'de> for EventVisitor {
|
|||
self.deserializer.add_diagnostic(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::WrongFieldCount,
|
||||
format!("I expected a remove event to have 3 fields, but found {}.", elements.len()),
|
||||
format!(
|
||||
"I expected a remove event to have 3 fields, but found {}.",
|
||||
elements.len()
|
||||
),
|
||||
);
|
||||
return Ok(self.deserializer);
|
||||
}
|
||||
|
|
@ -341,7 +355,10 @@ impl<'de> Visitor<'de> for EventVisitor {
|
|||
self.deserializer.add_diagnostic(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::WrongFieldCount,
|
||||
format!("I expected a move event to have 4 fields, but found {}.", elements.len()),
|
||||
format!(
|
||||
"I expected a move event to have 4 fields, but found {}.",
|
||||
elements.len()
|
||||
),
|
||||
);
|
||||
return Ok(self.deserializer);
|
||||
}
|
||||
|
|
@ -394,7 +411,10 @@ impl<'de> Visitor<'de> for EventVisitor {
|
|||
self.deserializer.add_diagnostic(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::WrongFieldCount,
|
||||
format!("I expected a snapshot event to have 4 fields, but found {}.", elements.len()),
|
||||
format!(
|
||||
"I expected a snapshot event to have 4 fields, but found {}.",
|
||||
elements.len()
|
||||
),
|
||||
);
|
||||
return Ok(self.deserializer);
|
||||
}
|
||||
|
|
@ -418,7 +438,8 @@ impl<'de> Visitor<'de> for EventVisitor {
|
|||
self.deserializer.add_diagnostic(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::WrongFieldType,
|
||||
"I expected the timestamp to be a valid ISO-8601 datetime string.".to_string(),
|
||||
"I expected the timestamp to be a valid ISO-8601 datetime string."
|
||||
.to_string(),
|
||||
);
|
||||
return Ok(self.deserializer);
|
||||
}
|
||||
|
|
@ -476,14 +497,18 @@ impl EventVisitor {
|
|||
let from_idx = match pair[0].as_u64() {
|
||||
Some(i) => i as usize,
|
||||
None => {
|
||||
return Err("I expected the 'from' index to be a non-negative integer.".to_string());
|
||||
return Err(
|
||||
"I expected the 'from' index to be a non-negative integer.".to_string()
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
let to_idx = match pair[1].as_u64() {
|
||||
Some(i) => i as usize,
|
||||
None => {
|
||||
return Err("I expected the 'to' index to be a non-negative integer.".to_string());
|
||||
return Err(
|
||||
"I expected the 'to' index to be a non-negative integer.".to_string()
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
@ -503,7 +528,7 @@ mod tests {
|
|||
fn test_deserialize_observe_event() {
|
||||
let json = json!(["observe", "obs-1", "2025-01-01T00:00:00Z", 1]);
|
||||
let result: Result<EventDeserializer, _> = serde_json::from_value(json);
|
||||
|
||||
|
||||
assert!(result.is_ok());
|
||||
let deserializer = result.unwrap();
|
||||
assert!(deserializer.diagnostics.is_empty());
|
||||
|
|
@ -518,7 +543,7 @@ mod tests {
|
|||
fn test_deserialize_add_event() {
|
||||
let json = json!(["add", "/count", 42, "obs-1"]);
|
||||
let result: Result<EventDeserializer, _> = serde_json::from_value(json);
|
||||
|
||||
|
||||
assert!(result.is_ok());
|
||||
let deserializer = result.unwrap();
|
||||
assert!(deserializer.diagnostics.is_empty());
|
||||
|
|
@ -533,11 +558,14 @@ mod tests {
|
|||
fn test_deserialize_invalid_event_type() {
|
||||
let json = json!(["invalid", "some", "data"]);
|
||||
let result: Result<EventDeserializer, _> = serde_json::from_value(json);
|
||||
|
||||
|
||||
assert!(result.is_ok());
|
||||
let deserializer = result.unwrap();
|
||||
assert_eq!(deserializer.diagnostics.len(), 1);
|
||||
assert_eq!(deserializer.diagnostics[0].code, DiagnosticCode::UnknownEventType);
|
||||
assert_eq!(
|
||||
deserializer.diagnostics[0].code,
|
||||
DiagnosticCode::UnknownEventType
|
||||
);
|
||||
assert!(deserializer.event.is_none());
|
||||
}
|
||||
|
||||
|
|
@ -545,11 +573,14 @@ mod tests {
|
|||
fn test_deserialize_wrong_field_count() {
|
||||
let json = json!(["observe", "obs-1"]);
|
||||
let result: Result<EventDeserializer, _> = serde_json::from_value(json);
|
||||
|
||||
|
||||
assert!(result.is_ok());
|
||||
let deserializer = result.unwrap();
|
||||
assert_eq!(deserializer.diagnostics.len(), 1);
|
||||
assert_eq!(deserializer.diagnostics[0].code, DiagnosticCode::WrongFieldCount);
|
||||
assert_eq!(
|
||||
deserializer.diagnostics[0].code,
|
||||
DiagnosticCode::WrongFieldCount
|
||||
);
|
||||
assert!(deserializer.event.is_none());
|
||||
}
|
||||
|
||||
|
|
@ -557,7 +588,7 @@ mod tests {
|
|||
fn test_deserialize_move_event() {
|
||||
let json = json!(["move", "/items", [[0, 2], [1, 0]], "obs-1"]);
|
||||
let result: Result<EventDeserializer, _> = serde_json::from_value(json);
|
||||
|
||||
|
||||
assert!(result.is_ok());
|
||||
let deserializer = result.unwrap();
|
||||
assert!(deserializer.diagnostics.is_empty());
|
||||
|
|
@ -567,4 +598,4 @@ mod tests {
|
|||
if path == "/items" && moves == vec![(0, 2), (1, 0)] && observation_id == "obs-1"
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue