refactor: increase parse error diagnostic converage

Replace several of the generic serde_json parse error messages with
detailed descriptions of what went wrong.
This commit is contained in:
nobody 2025-09-30 09:14:54 -07:00
commit 07e604ac25
Signed by: GrocerPublishAgent
GPG key ID: 43B1C298CDDE181C
4 changed files with 836 additions and 561 deletions

570
src/event_deserialize.rs Normal file
View file

@ -0,0 +1,570 @@
// json-archive is a tool for tracking JSON file changes over time
// Copyright (C) 2025 Peoples Grocers LLC
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published
// by the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
//
// To purchase a license under different terms contact admin@peoplesgrocers.com
// To request changes, report bugs, or give user feedback contact
// marxism@peoplesgrocers.com
//
//! Event deserialization with diagnostic collection.
//!
//! ## Why this exists
//!
//! The .json.archive format uses arrays for events because that's compact and easy to work
//! with in JavaScript: `["add", "/path", value, "obs-id"]`. The format is human-editable
//! since people might want to experiment with it or fix issues by hand.
//!
//! Two problems in Rust:
//!
//! 1. **Array-based format**: Serde derive expects named struct fields. Deserializing from
//! positional arrays into structs requires custom Visitor implementation.
//!
//! 2. **Detailed error messages**: Goal is Elm-style diagnostics that show exactly what went
//! wrong, what was expected, and how to fix it. Serde's Deserialize trait only allows
//! returning string errors. To generate detailed diagnostics (with codes, severity levels,
//! advice), we need to manually implement the Visitor and collect errors in a wrapper type
//! instead of failing immediately. The wrapper gives us access to which field is being
//! parsed so we can say "expected observation ID at position 3" instead of "parse error".
//!
//! ## Library search
//!
//! Spent 30 minutes looking for existing solutions. Checked:
//! - serde_path_to_error: Adds field path context but still returns string errors
//! - figment: Configuration library, but sounded like could be used only for diagnostics
//! - config/serde_value: Similar issue
//! - json5: Relaxed JSON syntax, not diagnostic-focused
//! - miette: a diagnostic library for Rust. It includes a series of
//! traits/protocols that allow you to hook into its error reporting facilities,
//! and even write your own error reports. This is better than my home built
//! Diagnostic struct, but does not help me with deserialization.
//!
//! Found no library that handles both array deserialization and rich diagnostic collection.
//! This could probably be automated or turned into a library, but for a simple format it was
//! faster to implement by hand. Also serves as exploration of what diagnostic-driven parsing
//! costs in terms of code.
//!
//! ## What this does
//!
//! EventDeserializer wraps Event and collects diagnostics during parsing. It implements
//! Deserialize with a custom Visitor that validates each array position and populates the
//! diagnostics vec instead of returning errors. The calling code (reader.rs) attaches
//! location information (filename, line number) after deserialization.
use serde::de::{Deserialize, Deserializer, SeqAccess, Visitor};
use serde_json::Value;
use std::fmt;
use chrono::{DateTime, Utc};
use crate::diagnostics::{Diagnostic, DiagnosticCode, DiagnosticLevel};
use crate::events::Event;
#[derive(Debug, Default)]
pub struct EventDeserializer {
pub event: Option<Event>,
pub diagnostics: Vec<Diagnostic>,
}
impl EventDeserializer {
pub fn new() -> Self {
Self::default()
}
fn add_diagnostic(&mut self, level: DiagnosticLevel, code: DiagnosticCode, message: String) {
self.diagnostics.push(Diagnostic::new(level, code, message));
}
}
impl<'de> Deserialize<'de> for EventDeserializer {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
deserializer.deserialize_seq(EventVisitor::new())
}
}
struct EventVisitor {
deserializer: EventDeserializer,
}
impl EventVisitor {
fn new() -> Self {
Self {
deserializer: EventDeserializer::new(),
}
}
}
impl<'de> Visitor<'de> for EventVisitor {
type Value = EventDeserializer;
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
formatter.write_str("an array representing an event")
}
fn visit_seq<A>(mut self, mut seq: A) -> Result<Self::Value, A::Error>
where
A: SeqAccess<'de>,
{
let mut elements: Vec<Value> = Vec::new();
while let Some(elem) = seq.next_element::<Value>()? {
elements.push(elem);
}
if elements.is_empty() {
self.deserializer.add_diagnostic(
DiagnosticLevel::Fatal,
DiagnosticCode::WrongFieldCount,
"I found an empty array, but events must have at least a string type field as first element.".to_string(),
);
return Ok(self.deserializer);
}
let event_type = match elements[0].as_str() {
Some(t) => t,
None => {
self.deserializer.add_diagnostic(
DiagnosticLevel::Fatal,
DiagnosticCode::WrongFieldType,
"I expected the first element of an event to be a string event type.".to_string(),
);
return Ok(self.deserializer);
}
};
match event_type {
"observe" => {
if elements.len() != 4 {
self.deserializer.add_diagnostic(
DiagnosticLevel::Fatal,
DiagnosticCode::WrongFieldCount,
format!("I expected an observe event to have 4 fields, but found {}.", elements.len()),
);
return Ok(self.deserializer);
}
let id = match elements[1].as_str() {
Some(s) => s.to_string(),
None => {
self.deserializer.add_diagnostic(
DiagnosticLevel::Fatal,
DiagnosticCode::WrongFieldType,
"I expected the observation ID to be a string.".to_string(),
);
return Ok(self.deserializer);
}
};
let timestamp = match elements[2].as_str() {
Some(s) => match s.parse::<DateTime<Utc>>() {
Ok(dt) => dt,
Err(_) => {
self.deserializer.add_diagnostic(
DiagnosticLevel::Fatal,
DiagnosticCode::WrongFieldType,
"I expected the timestamp to be a valid ISO-8601 datetime string.".to_string(),
);
return Ok(self.deserializer);
}
},
None => {
self.deserializer.add_diagnostic(
DiagnosticLevel::Fatal,
DiagnosticCode::WrongFieldType,
"I expected the timestamp to be a string.".to_string(),
);
return Ok(self.deserializer);
}
};
let change_count = match elements[3].as_u64() {
Some(n) => n as usize,
None => {
self.deserializer.add_diagnostic(
DiagnosticLevel::Fatal,
DiagnosticCode::WrongFieldType,
"I expected the change count to be a non-negative integer.".to_string(),
);
return Ok(self.deserializer);
}
};
self.deserializer.event = Some(Event::Observe {
observation_id: id,
timestamp,
change_count,
});
}
"add" => {
if elements.len() != 4 {
self.deserializer.add_diagnostic(
DiagnosticLevel::Fatal,
DiagnosticCode::WrongFieldCount,
format!("I expected an add event to have 4 fields, but found {}.", elements.len()),
);
return Ok(self.deserializer);
}
let path = match elements[1].as_str() {
Some(s) => s.to_string(),
None => {
self.deserializer.add_diagnostic(
DiagnosticLevel::Fatal,
DiagnosticCode::WrongFieldType,
"I expected the path to be a string.".to_string(),
);
return Ok(self.deserializer);
}
};
let value = elements[2].clone();
let observation_id = match elements[3].as_str() {
Some(s) => s.to_string(),
None => {
self.deserializer.add_diagnostic(
DiagnosticLevel::Fatal,
DiagnosticCode::WrongFieldType,
"I expected the observation ID to be a string.".to_string(),
);
return Ok(self.deserializer);
}
};
self.deserializer.event = Some(Event::Add {
path,
value,
observation_id,
});
}
"change" => {
if elements.len() != 4 {
self.deserializer.add_diagnostic(
DiagnosticLevel::Fatal,
DiagnosticCode::WrongFieldCount,
format!("I expected a change event to have 4 fields, but found {}.", elements.len()),
);
return Ok(self.deserializer);
}
let path = match elements[1].as_str() {
Some(s) => s.to_string(),
None => {
self.deserializer.add_diagnostic(
DiagnosticLevel::Fatal,
DiagnosticCode::WrongFieldType,
"I expected the path to be a string.".to_string(),
);
return Ok(self.deserializer);
}
};
let new_value = elements[2].clone();
let observation_id = match elements[3].as_str() {
Some(s) => s.to_string(),
None => {
self.deserializer.add_diagnostic(
DiagnosticLevel::Fatal,
DiagnosticCode::WrongFieldType,
"I expected the observation ID to be a string.".to_string(),
);
return Ok(self.deserializer);
}
};
self.deserializer.event = Some(Event::Change {
path,
new_value,
observation_id,
});
}
"remove" => {
if elements.len() != 3 {
self.deserializer.add_diagnostic(
DiagnosticLevel::Fatal,
DiagnosticCode::WrongFieldCount,
format!("I expected a remove event to have 3 fields, but found {}.", elements.len()),
);
return Ok(self.deserializer);
}
let path = match elements[1].as_str() {
Some(s) => s.to_string(),
None => {
self.deserializer.add_diagnostic(
DiagnosticLevel::Fatal,
DiagnosticCode::WrongFieldType,
"I expected the path to be a string.".to_string(),
);
return Ok(self.deserializer);
}
};
let observation_id = match elements[2].as_str() {
Some(s) => s.to_string(),
None => {
self.deserializer.add_diagnostic(
DiagnosticLevel::Fatal,
DiagnosticCode::WrongFieldType,
"I expected the observation ID to be a string.".to_string(),
);
return Ok(self.deserializer);
}
};
self.deserializer.event = Some(Event::Remove {
path,
observation_id,
});
}
"move" => {
if elements.len() != 4 {
self.deserializer.add_diagnostic(
DiagnosticLevel::Fatal,
DiagnosticCode::WrongFieldCount,
format!("I expected a move event to have 4 fields, but found {}.", elements.len()),
);
return Ok(self.deserializer);
}
let path = match elements[1].as_str() {
Some(s) => s.to_string(),
None => {
self.deserializer.add_diagnostic(
DiagnosticLevel::Fatal,
DiagnosticCode::WrongFieldType,
"I expected the path to be a string.".to_string(),
);
return Ok(self.deserializer);
}
};
let moves = match self.parse_moves(&elements[2]) {
Ok(moves) => moves,
Err(err_msg) => {
self.deserializer.add_diagnostic(
DiagnosticLevel::Fatal,
DiagnosticCode::WrongFieldType,
err_msg,
);
return Ok(self.deserializer);
}
};
let observation_id = match elements[3].as_str() {
Some(s) => s.to_string(),
None => {
self.deserializer.add_diagnostic(
DiagnosticLevel::Fatal,
DiagnosticCode::WrongFieldType,
"I expected the observation ID to be a string.".to_string(),
);
return Ok(self.deserializer);
}
};
self.deserializer.event = Some(Event::Move {
path,
moves,
observation_id,
});
}
"snapshot" => {
if elements.len() != 4 {
self.deserializer.add_diagnostic(
DiagnosticLevel::Fatal,
DiagnosticCode::WrongFieldCount,
format!("I expected a snapshot event to have 4 fields, but found {}.", elements.len()),
);
return Ok(self.deserializer);
}
let observation_id = match elements[1].as_str() {
Some(s) => s.to_string(),
None => {
self.deserializer.add_diagnostic(
DiagnosticLevel::Fatal,
DiagnosticCode::WrongFieldType,
"I expected the observation ID to be a string.".to_string(),
);
return Ok(self.deserializer);
}
};
let timestamp = match elements[2].as_str() {
Some(s) => match s.parse::<DateTime<Utc>>() {
Ok(dt) => dt,
Err(_) => {
self.deserializer.add_diagnostic(
DiagnosticLevel::Fatal,
DiagnosticCode::WrongFieldType,
"I expected the timestamp to be a valid ISO-8601 datetime string.".to_string(),
);
return Ok(self.deserializer);
}
},
None => {
self.deserializer.add_diagnostic(
DiagnosticLevel::Fatal,
DiagnosticCode::WrongFieldType,
"I expected the timestamp to be a string.".to_string(),
);
return Ok(self.deserializer);
}
};
let object = elements[3].clone();
self.deserializer.event = Some(Event::Snapshot {
observation_id,
timestamp,
object,
});
}
_ => {
self.deserializer.add_diagnostic(
DiagnosticLevel::Warning,
DiagnosticCode::UnknownEventType,
format!("I found an unknown event type: '{}'", event_type),
);
}
}
Ok(self.deserializer)
}
}
impl EventVisitor {
fn parse_moves(&mut self, moves_value: &Value) -> Result<Vec<(usize, usize)>, String> {
let moves_array = match moves_value.as_array() {
Some(arr) => arr,
None => {
return Err("I expected the moves to be an array of [from, to] pairs.".to_string());
}
};
let mut moves = Vec::new();
for move_pair in moves_array {
let pair = match move_pair.as_array() {
Some(p) if p.len() == 2 => p,
_ => {
return Err("I expected each move to be a [from, to] pair.".to_string());
}
};
let from_idx = match pair[0].as_u64() {
Some(i) => i as usize,
None => {
return Err("I expected the 'from' index to be a non-negative integer.".to_string());
}
};
let to_idx = match pair[1].as_u64() {
Some(i) => i as usize,
None => {
return Err("I expected the 'to' index to be a non-negative integer.".to_string());
}
};
moves.push((from_idx, to_idx));
}
Ok(moves)
}
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
#[test]
fn test_deserialize_observe_event() {
let json = json!(["observe", "obs-1", "2025-01-01T00:00:00Z", 1]);
let result: Result<EventDeserializer, _> = serde_json::from_value(json);
assert!(result.is_ok());
let deserializer = result.unwrap();
assert!(deserializer.diagnostics.is_empty());
assert!(matches!(
deserializer.event,
Some(Event::Observe { observation_id, timestamp: _, change_count })
if observation_id == "obs-1" && change_count == 1
));
}
#[test]
fn test_deserialize_add_event() {
let json = json!(["add", "/count", 42, "obs-1"]);
let result: Result<EventDeserializer, _> = serde_json::from_value(json);
assert!(result.is_ok());
let deserializer = result.unwrap();
assert!(deserializer.diagnostics.is_empty());
assert!(matches!(
deserializer.event,
Some(Event::Add { path, value, observation_id })
if path == "/count" && value == json!(42) && observation_id == "obs-1"
));
}
#[test]
fn test_deserialize_invalid_event_type() {
let json = json!(["invalid", "some", "data"]);
let result: Result<EventDeserializer, _> = serde_json::from_value(json);
assert!(result.is_ok());
let deserializer = result.unwrap();
assert_eq!(deserializer.diagnostics.len(), 1);
assert_eq!(deserializer.diagnostics[0].code, DiagnosticCode::UnknownEventType);
assert!(deserializer.event.is_none());
}
#[test]
fn test_deserialize_wrong_field_count() {
let json = json!(["observe", "obs-1"]);
let result: Result<EventDeserializer, _> = serde_json::from_value(json);
assert!(result.is_ok());
let deserializer = result.unwrap();
assert_eq!(deserializer.diagnostics.len(), 1);
assert_eq!(deserializer.diagnostics[0].code, DiagnosticCode::WrongFieldCount);
assert!(deserializer.event.is_none());
}
#[test]
fn test_deserialize_move_event() {
let json = json!(["move", "/items", [[0, 2], [1, 0]], "obs-1"]);
let result: Result<EventDeserializer, _> = serde_json::from_value(json);
assert!(result.is_ok());
let deserializer = result.unwrap();
assert!(deserializer.diagnostics.is_empty());
assert!(matches!(
deserializer.event,
Some(Event::Move { path, moves, observation_id })
if path == "/items" && moves == vec![(0, 2), (1, 0)] && observation_id == "obs-1"
));
}
}

View file

@ -23,6 +23,7 @@ pub mod archive;
pub mod detection;
pub mod diagnostics;
pub mod diff;
pub mod event_deserialize;
pub mod events;
pub mod flags;
pub mod pointer;

View file

@ -26,7 +26,8 @@ use std::io::{BufRead, BufReader};
use std::path::Path;
use crate::diagnostics::{Diagnostic, DiagnosticCode, DiagnosticCollector, DiagnosticLevel};
use crate::events::Header;
use crate::event_deserialize::EventDeserializer;
use crate::events::{Event, Header};
use crate::pointer::JsonPointer;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
@ -165,8 +166,8 @@ impl ArchiveReader {
continue;
}
let event = match serde_json::from_str::<Value>(&line) {
Ok(v) => v,
let event_deserializer = match serde_json::from_str::<EventDeserializer>(&line) {
Ok(d) => d,
Err(e) => {
diagnostics.add(
Diagnostic::new(
@ -188,351 +189,188 @@ impl ArchiveReader {
}
};
if let Some(arr) = event.as_array() {
if arr.is_empty() {
diagnostics.add(
Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::WrongFieldCount,
"I found an empty array, but events must have at least a type field."
.to_string(),
)
// Add any diagnostics from deserialization with location info
for diagnostic in event_deserializer.diagnostics {
diagnostics.add(
diagnostic
.with_location(self.filename.clone(), line_number)
.with_snippet(format!("{} | {}", line_number, line)),
);
continue;
.with_snippet(format!("{} | {}", line_number, line))
);
}
// Continue processing to collect additional errors before failing.
// Even though this function must now return an error, we continue to help
// the user identify all issues in the file at once rather than one at a time.
let event = match event_deserializer.event {
Some(e) => e,
None => {
assert!(diagnostics.has_fatal(), "Expected a fatal diagnostic when deserialization fails");
continue
},
};
match event {
Event::Observe { observation_id, timestamp: _, change_count } => {
if let Some((_obs_id, obs_line, expected_count)) = &current_observation {
if events_in_observation != *expected_count {
diagnostics.add(
Diagnostic::new(
DiagnosticLevel::Warning,
DiagnosticCode::ChangeCountMismatch,
format!(
"The observe event at line {} declared {} changes, but I found {}.",
obs_line, expected_count, events_in_observation
)
)
.with_location(self.filename.clone(), *obs_line)
.with_advice(
"Make sure the change_count in the observe event matches the number of \
add/change/remove/move events that follow it."
.to_string()
)
);
}
}
if seen_observations.contains(&observation_id) {
diagnostics.add(
Diagnostic::new(
DiagnosticLevel::Warning,
DiagnosticCode::DuplicateObservationId,
format!("I found a duplicate observation ID: '{}'", observation_id),
)
.with_location(self.filename.clone(), line_number)
.with_advice(
"Each observation ID should be unique within the archive. \
Consider using UUIDs or timestamps to ensure uniqueness."
.to_string(),
),
);
}
seen_observations.insert(observation_id.clone());
current_observation = Some((observation_id, line_number, change_count));
events_in_observation = 0;
observation_count += 1;
}
let event_type = match arr[0].as_str() {
Some(t) => t,
None => {
Event::Add { path, value, observation_id } => {
events_in_observation += 1;
if self.mode == ReadMode::FullValidation
&& !seen_observations.contains(&observation_id)
{
diagnostics.add(
Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::WrongFieldType,
"I expected the first element of an event to be a string event type.".to_string()
DiagnosticCode::NonExistentObservationId,
format!("I found a reference to observation '{}', but I haven't seen an observe event with that ID yet.", observation_id)
)
.with_location(self.filename.clone(), line_number)
.with_snippet(format!("{} | {}", line_number, line))
.with_advice(
"Events must look like [eventType, ...]. The eventType must be one of:\n\
observe, add, change, remove, move, snapshot."
"Each add/change/remove/move event must reference an observation ID from a preceding observe event."
.to_string()
)
);
continue;
}
};
match event_type {
"observe" => {
if let Some((_obs_id, obs_line, expected_count)) = &current_observation {
if events_in_observation != *expected_count {
diagnostics.add(
Diagnostic::new(
DiagnosticLevel::Warning,
DiagnosticCode::ChangeCountMismatch,
format!(
"The observe event at line {} declared {} changes, but I found {}.",
obs_line, expected_count, events_in_observation
)
)
.with_location(self.filename.clone(), *obs_line)
.with_advice(
"Make sure the change_count in the observe event matches the number of \
add/change/remove/move events that follow it."
.to_string()
)
);
}
}
if arr.len() != 4 {
diagnostics.add(
Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::WrongFieldCount,
format!("I expected an observe event to have 4 fields, but found {}.", arr.len())
)
.with_location(self.filename.clone(), line_number)
.with_snippet(format!("{} | {}", line_number, line))
.with_advice(
"Observe events must be: [\"observe\", observationId, timestamp, changeCount]"
.to_string()
)
);
continue;
}
let obs_id = arr[1].as_str().unwrap_or("").to_string();
let change_count = arr[3].as_u64().unwrap_or(0) as usize;
if seen_observations.contains(&obs_id) {
diagnostics.add(
Diagnostic::new(
DiagnosticLevel::Warning,
DiagnosticCode::DuplicateObservationId,
format!("I found a duplicate observation ID: '{}'", obs_id),
)
.with_location(self.filename.clone(), line_number)
.with_advice(
"Each observation ID should be unique within the archive. \
Consider using UUIDs or timestamps to ensure uniqueness."
.to_string(),
),
);
}
seen_observations.insert(obs_id.clone());
current_observation = Some((obs_id, line_number, change_count));
events_in_observation = 0;
observation_count += 1;
if let Err(diag) = apply_add(&mut state, &path, value) {
diagnostics.add(diag.with_location(self.filename.clone(), line_number));
continue;
}
}
"add" => {
events_in_observation += 1;
if arr.len() != 4 {
diagnostics.add(
Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::WrongFieldCount,
format!(
"I expected an add event to have 4 fields, but found {}.",
arr.len()
),
)
.with_location(self.filename.clone(), line_number)
.with_snippet(format!("{} | {}", line_number, line)),
);
continue;
}
Event::Change { path, new_value, observation_id } => {
events_in_observation += 1;
let path = arr[1].as_str().unwrap_or("");
let value = arr[2].clone();
let obs_id = arr[3].as_str().unwrap_or("");
if self.mode == ReadMode::FullValidation
&& !seen_observations.contains(obs_id)
{
diagnostics.add(
Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::NonExistentObservationId,
format!("I found a reference to observation '{}', but I haven't seen an observe event with that ID yet.", obs_id)
)
.with_location(self.filename.clone(), line_number)
.with_snippet(format!("{} | {}", line_number, line))
.with_advice(
"Each add/change/remove/move event must reference an observation ID from a preceding observe event."
.to_string()
)
);
continue;
}
if let Err(_) =
self.apply_add(&mut state, path, value, line_number, &mut diagnostics)
{
continue;
}
}
"change" => {
events_in_observation += 1;
if arr.len() != 4 {
diagnostics.add(
Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::WrongFieldCount,
format!(
"I expected a change event to have 4 fields, but found {}.",
arr.len()
),
)
.with_location(self.filename.clone(), line_number)
.with_snippet(format!("{} | {}", line_number, line)),
);
continue;
}
let path = arr[1].as_str().unwrap_or("");
let new_value = arr[2].clone();
let obs_id = arr[3].as_str().unwrap_or("");
if self.mode == ReadMode::FullValidation
&& !seen_observations.contains(obs_id)
{
diagnostics.add(
Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::NonExistentObservationId,
format!("I found a reference to observation '{}', but I haven't seen an observe event with that ID yet.", obs_id)
)
.with_location(self.filename.clone(), line_number)
);
continue;
}
if let Err(_) = self.apply_change(
&mut state,
path,
new_value,
line_number,
&mut diagnostics,
) {
continue;
}
}
"remove" => {
events_in_observation += 1;
if arr.len() != 3 {
diagnostics.add(
Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::WrongFieldCount,
format!(
"I expected a remove event to have 3 fields, but found {}.",
arr.len()
),
)
.with_location(self.filename.clone(), line_number)
.with_snippet(format!("{} | {}", line_number, line)),
);
continue;
}
let path = arr[1].as_str().unwrap_or("");
let obs_id = arr[2].as_str().unwrap_or("");
if self.mode == ReadMode::FullValidation
&& !seen_observations.contains(obs_id)
{
diagnostics.add(
Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::NonExistentObservationId,
format!("I found a reference to observation '{}', but I haven't seen an observe event with that ID yet.", obs_id)
)
.with_location(self.filename.clone(), line_number)
);
continue;
}
if let Err(_) =
self.apply_remove(&mut state, path, line_number, &mut diagnostics)
{
continue;
}
}
"move" => {
events_in_observation += 1;
if arr.len() != 4 {
diagnostics.add(
Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::WrongFieldCount,
format!(
"I expected a move event to have 4 fields, but found {}.",
arr.len()
),
)
.with_location(self.filename.clone(), line_number)
.with_snippet(format!("{} | {}", line_number, line)),
);
continue;
}
let path = arr[1].as_str().unwrap_or("");
let moves = arr[2].clone();
let obs_id = arr[3].as_str().unwrap_or("");
if self.mode == ReadMode::FullValidation
&& !seen_observations.contains(obs_id)
{
diagnostics.add(
Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::NonExistentObservationId,
format!("I found a reference to observation '{}', but I haven't seen an observe event with that ID yet.", obs_id)
)
.with_location(self.filename.clone(), line_number)
);
continue;
}
if let Err(_) =
self.apply_move(&mut state, path, moves, line_number, &mut diagnostics)
{
continue;
}
}
"snapshot" => {
if arr.len() != 4 {
diagnostics.add(
Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::WrongFieldCount,
format!("I expected a snapshot event to have 4 fields, but found {}.", arr.len())
)
.with_location(self.filename.clone(), line_number)
.with_snippet(format!("{} | {}", line_number, line))
);
continue;
}
let snapshot_state = arr[3].clone();
if self.mode == ReadMode::FullValidation && state != snapshot_state {
diagnostics.add(
Diagnostic::new(
DiagnosticLevel::Warning,
DiagnosticCode::SnapshotStateMismatch,
"I found a snapshot whose state doesn't match the replayed state up to this point.".to_string()
)
.with_location(self.filename.clone(), line_number)
.with_advice(
"This could indicate corruption or that events were applied incorrectly. \
The snapshot state should exactly match the result of replaying all events \
from the initial state."
.to_string()
)
);
}
state = snapshot_state;
}
_ => {
if self.mode == ReadMode::FullValidation
&& !seen_observations.contains(&observation_id)
{
diagnostics.add(
Diagnostic::new(
DiagnosticLevel::Warning,
DiagnosticCode::UnknownEventType,
format!("I found an unknown event type: '{}'", event_type)
DiagnosticLevel::Fatal,
DiagnosticCode::NonExistentObservationId,
format!("I found a reference to observation '{}', but I haven't seen an observe event with that ID yet.", observation_id)
)
.with_location(self.filename.clone(), line_number)
);
continue;
}
if let Err(diag) = apply_change(&mut state, &path, new_value) {
diagnostics.add(diag.with_location(self.filename.clone(), line_number));
continue;
}
}
Event::Remove { path, observation_id } => {
events_in_observation += 1;
if self.mode == ReadMode::FullValidation
&& !seen_observations.contains(&observation_id)
{
diagnostics.add(
Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::NonExistentObservationId,
format!("I found a reference to observation '{}', but I haven't seen an observe event with that ID yet.", observation_id)
)
.with_location(self.filename.clone(), line_number)
);
continue;
}
if let Err(diag) = apply_remove(&mut state, &path) {
diagnostics.add(diag.with_location(self.filename.clone(), line_number));
continue;
}
}
Event::Move { path, moves, observation_id } => {
events_in_observation += 1;
if self.mode == ReadMode::FullValidation
&& !seen_observations.contains(&observation_id)
{
diagnostics.add(
Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::NonExistentObservationId,
format!("I found a reference to observation '{}', but I haven't seen an observe event with that ID yet.", observation_id)
)
.with_location(self.filename.clone(), line_number)
);
continue;
}
if let Err(diag) = apply_move(&mut state, &path, moves) {
diagnostics.add(diag.with_location(self.filename.clone(), line_number));
continue;
}
}
Event::Snapshot { observation_id: _, timestamp: _, object } => {
if self.mode == ReadMode::FullValidation && state != object {
diagnostics.add(
Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::SnapshotStateMismatch,
"I found a snapshot whose state doesn't match the replayed state up to this point.".to_string()
)
.with_location(self.filename.clone(), line_number)
.with_snippet(format!("{} | {}", line_number, line))
.with_advice(
"Valid event types are: observe, add, change, remove, move, snapshot. \
This line will be skipped."
"This could indicate corruption or that events were applied incorrectly. \
The snapshot state should exactly match the result of replaying all events \
from the initial state."
.to_string()
)
);
}
state = object;
}
} else {
diagnostics.add(
Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::WrongFieldType,
"I expected an event to be a JSON array, but found a different type."
.to_string(),
)
.with_location(self.filename.clone(), line_number)
.with_snippet(format!("{} | {}", line_number, line)),
);
}
}
@ -630,254 +468,107 @@ impl ArchiveReader {
}
}
fn apply_add(
&self,
state: &mut Value,
path: &str,
value: Value,
line_number: usize,
diagnostics: &mut DiagnosticCollector,
) -> Result<(), ()> {
let pointer = match JsonPointer::new(path) {
Ok(p) => p,
Err(diag) => {
diagnostics.add(
diag.with_location(self.filename.clone(), line_number)
.with_advice(
"JSON Pointer paths must start with '/' and use '/' to separate segments.\n\
Special characters: use ~0 for ~ and ~1 for /"
.to_string()
)
);
return Err(());
}
};
}
if let Err(diag) = pointer.set(state, value) {
diagnostics.add(
diag.with_location(self.filename.clone(), line_number)
.with_advice(
"For add operations, the parent path must exist. \
For example, to add /a/b/c, the paths /a and /a/b must already exist."
.to_string(),
fn apply_add(state: &mut Value, path: &str, value: Value) -> Result<(), Diagnostic> {
let pointer = JsonPointer::new(path).map_err(|diag| {
diag.with_advice(
"JSON Pointer paths must start with '/' and use '/' to separate segments.\n\
Special characters: use ~0 for ~ and ~1 for /"
.to_string()
)
})?;
pointer.set(state, value).map_err(|diag| {
diag.with_advice(
"For add operations, the parent path must exist. \
For example, to add /a/b/c, the paths /a and /a/b must already exist."
.to_string()
)
})
}
fn apply_change(state: &mut Value, path: &str, new_value: Value) -> Result<(), Diagnostic> {
let pointer = JsonPointer::new(path)?;
pointer.set(state, new_value)?;
Ok(())
}
fn apply_remove(state: &mut Value, path: &str) -> Result<(), Diagnostic> {
let pointer = JsonPointer::new(path)?;
pointer.remove(state)?;
Ok(())
}
fn apply_move(
state: &mut Value,
path: &str,
moves: Vec<(usize, usize)>,
) -> Result<(), Diagnostic> {
let pointer = JsonPointer::new(path)?;
let array = pointer.get(state)?;
if !array.is_array() {
return Err(
Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::MoveOnNonArray,
format!(
"I can't apply move operations to '{}' because it's not an array.",
path
),
)
.with_advice(
"Move operations can only reorder elements within an array. \
The path must point to an array value."
.to_string(),
),
);
}
let mut arr = array.as_array().unwrap().clone();
for (from_idx, to_idx) in moves {
if from_idx >= arr.len() {
return Err(
Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::MoveIndexOutOfBounds,
format!(
"The 'from' index {} is out of bounds (array length is {}).",
from_idx,
arr.len()
),
)
);
return Err(());
}
Ok(())
}
fn apply_change(
&self,
state: &mut Value,
path: &str,
new_value: Value,
line_number: usize,
diagnostics: &mut DiagnosticCollector,
) -> Result<(), ()> {
let pointer = match JsonPointer::new(path) {
Ok(p) => p,
Err(diag) => {
diagnostics.add(diag.with_location(self.filename.clone(), line_number));
return Err(());
}
};
if let Err(diag) = pointer.set(state, new_value) {
diagnostics.add(diag.with_location(self.filename.clone(), line_number));
return Err(());
if to_idx > arr.len() {
return Err(
Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::MoveIndexOutOfBounds,
format!(
"The 'to' index {} is out of bounds (array length is {}).",
to_idx,
arr.len()
),
)
);
}
Ok(())
let element = arr[from_idx].clone();
arr.insert(to_idx, element);
let remove_idx = if from_idx > to_idx {
from_idx + 1
} else {
from_idx
};
arr.remove(remove_idx);
}
fn apply_remove(
&self,
state: &mut Value,
path: &str,
line_number: usize,
diagnostics: &mut DiagnosticCollector,
) -> Result<(), ()> {
let pointer = match JsonPointer::new(path) {
Ok(p) => p,
Err(diag) => {
diagnostics.add(diag.with_location(self.filename.clone(), line_number));
return Err(());
}
};
if let Err(mut diag) = pointer.remove(state) {
if self.mode == ReadMode::FullValidation {
diag.level = DiagnosticLevel::Fatal;
} else {
diag.level = DiagnosticLevel::Warning;
}
diagnostics.add(diag.with_location(self.filename.clone(), line_number));
if self.mode == ReadMode::FullValidation {
return Err(());
}
}
Ok(())
}
fn apply_move(
&self,
state: &mut Value,
path: &str,
moves_value: Value,
line_number: usize,
diagnostics: &mut DiagnosticCollector,
) -> Result<(), ()> {
let pointer = match JsonPointer::new(path) {
Ok(p) => p,
Err(diag) => {
diagnostics.add(diag.with_location(self.filename.clone(), line_number));
return Err(());
}
};
let array = match pointer.get(state) {
Ok(v) => {
if !v.is_array() {
diagnostics.add(
Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::MoveOnNonArray,
format!(
"I can't apply move operations to '{}' because it's not an array.",
path
),
)
.with_location(self.filename.clone(), line_number)
.with_advice(
"Move operations can only reorder elements within an array. \
The path must point to an array value."
.to_string(),
),
);
return Err(());
}
v.clone()
}
Err(diag) => {
diagnostics.add(diag.with_location(self.filename.clone(), line_number));
return Err(());
}
};
let mut arr = array.as_array().unwrap().clone();
let moves = match moves_value.as_array() {
Some(m) => m,
None => {
diagnostics.add(
Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::WrongFieldType,
"I expected the moves to be an array of [from, to] pairs.".to_string(),
)
.with_location(self.filename.clone(), line_number),
);
return Err(());
}
};
for move_pair in moves {
let pair = match move_pair.as_array() {
Some(p) if p.len() == 2 => p,
_ => {
diagnostics.add(
Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::WrongFieldType,
"I expected each move to be a [from, to] pair.".to_string(),
)
.with_location(self.filename.clone(), line_number),
);
return Err(());
}
};
let from_idx = match pair[0].as_u64() {
Some(i) => i as usize,
None => {
diagnostics.add(
Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::InvalidMoveIndex,
"I expected the 'from' index to be a non-negative integer.".to_string(),
)
.with_location(self.filename.clone(), line_number),
);
return Err(());
}
};
let to_idx = match pair[1].as_u64() {
Some(i) => i as usize,
None => {
diagnostics.add(
Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::InvalidMoveIndex,
"I expected the 'to' index to be a non-negative integer.".to_string(),
)
.with_location(self.filename.clone(), line_number),
);
return Err(());
}
};
if from_idx >= arr.len() {
diagnostics.add(
Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::MoveIndexOutOfBounds,
format!(
"The 'from' index {} is out of bounds (array length is {}).",
from_idx,
arr.len()
),
)
.with_location(self.filename.clone(), line_number),
);
return Err(());
}
if to_idx > arr.len() {
diagnostics.add(
Diagnostic::new(
DiagnosticLevel::Fatal,
DiagnosticCode::MoveIndexOutOfBounds,
format!(
"The 'to' index {} is out of bounds (array length is {}).",
to_idx,
arr.len()
),
)
.with_location(self.filename.clone(), line_number),
);
return Err(());
}
let element = arr[from_idx].clone();
arr.insert(to_idx, element);
let remove_idx = if from_idx > to_idx {
from_idx + 1
} else {
from_idx
};
arr.remove(remove_idx);
}
pointer.set(state, Value::Array(arr)).map_err(|diag| {
diagnostics.add(diag.with_location(self.filename.clone(), line_number));
})
}
pointer.set(state, Value::Array(arr))
}
#[cfg(test)]