refactor: increase parse error diagnostic converage
Replace several of the generic serde_json parse error messages with detailed descriptions of what went wrong.
This commit is contained in:
parent
b370f462f2
commit
07e604ac25
4 changed files with 836 additions and 561 deletions
570
src/event_deserialize.rs
Normal file
570
src/event_deserialize.rs
Normal file
|
|
@ -0,0 +1,570 @@
|
|||
// json-archive is a tool for tracking JSON file changes over time
|
||||
// Copyright (C) 2025 Peoples Grocers LLC
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as published
|
||||
// by the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
//
|
||||
// To purchase a license under different terms contact admin@peoplesgrocers.com
|
||||
// To request changes, report bugs, or give user feedback contact
|
||||
// marxism@peoplesgrocers.com
|
||||
//
|
||||
|
||||
//! Event deserialization with diagnostic collection.
|
||||
//!
|
||||
//! ## Why this exists
|
||||
//!
|
||||
//! The .json.archive format uses arrays for events because that's compact and easy to work
|
||||
//! with in JavaScript: `["add", "/path", value, "obs-id"]`. The format is human-editable
|
||||
//! since people might want to experiment with it or fix issues by hand.
|
||||
//!
|
||||
//! Two problems in Rust:
|
||||
//!
|
||||
//! 1. **Array-based format**: Serde derive expects named struct fields. Deserializing from
|
||||
//! positional arrays into structs requires custom Visitor implementation.
|
||||
//!
|
||||
//! 2. **Detailed error messages**: Goal is Elm-style diagnostics that show exactly what went
|
||||
//! wrong, what was expected, and how to fix it. Serde's Deserialize trait only allows
|
||||
//! returning string errors. To generate detailed diagnostics (with codes, severity levels,
|
||||
//! advice), we need to manually implement the Visitor and collect errors in a wrapper type
|
||||
//! instead of failing immediately. The wrapper gives us access to which field is being
|
||||
//! parsed so we can say "expected observation ID at position 3" instead of "parse error".
|
||||
//!
|
||||
//! ## Library search
|
||||
//!
|
||||
//! Spent 30 minutes looking for existing solutions. Checked:
|
||||
//! - serde_path_to_error: Adds field path context but still returns string errors
|
||||
//! - figment: Configuration library, but sounded like could be used only for diagnostics
|
||||
//! - config/serde_value: Similar issue
|
||||
//! - json5: Relaxed JSON syntax, not diagnostic-focused
|
||||
//! - miette: a diagnostic library for Rust. It includes a series of
|
||||
//! traits/protocols that allow you to hook into its error reporting facilities,
|
||||
//! and even write your own error reports. This is better than my home built
|
||||
//! Diagnostic struct, but does not help me with deserialization.
|
||||
//!
|
||||
//! Found no library that handles both array deserialization and rich diagnostic collection.
|
||||
//! This could probably be automated or turned into a library, but for a simple format it was
|
||||
//! faster to implement by hand. Also serves as exploration of what diagnostic-driven parsing
|
||||
//! costs in terms of code.
|
||||
//!
|
||||
//! ## What this does
|
||||
//!
|
||||
//! EventDeserializer wraps Event and collects diagnostics during parsing. It implements
|
||||
//! Deserialize with a custom Visitor that validates each array position and populates the
|
||||
//! diagnostics vec instead of returning errors. The calling code (reader.rs) attaches
|
||||
//! location information (filename, line number) after deserialization.
|
||||
|
||||
use serde::de::{Deserialize, Deserializer, SeqAccess, Visitor};
|
||||
use serde_json::Value;
|
||||
use std::fmt;
|
||||
use chrono::{DateTime, Utc};
|
||||
|
||||
use crate::diagnostics::{Diagnostic, DiagnosticCode, DiagnosticLevel};
|
||||
use crate::events::Event;
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct EventDeserializer {
|
||||
pub event: Option<Event>,
|
||||
pub diagnostics: Vec<Diagnostic>,
|
||||
}
|
||||
|
||||
impl EventDeserializer {
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
|
||||
fn add_diagnostic(&mut self, level: DiagnosticLevel, code: DiagnosticCode, message: String) {
|
||||
self.diagnostics.push(Diagnostic::new(level, code, message));
|
||||
}
|
||||
}
|
||||
|
||||
impl<'de> Deserialize<'de> for EventDeserializer {
|
||||
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
||||
where
|
||||
D: Deserializer<'de>,
|
||||
{
|
||||
deserializer.deserialize_seq(EventVisitor::new())
|
||||
}
|
||||
}
|
||||
|
||||
struct EventVisitor {
|
||||
deserializer: EventDeserializer,
|
||||
}
|
||||
|
||||
impl EventVisitor {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
deserializer: EventDeserializer::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'de> Visitor<'de> for EventVisitor {
|
||||
type Value = EventDeserializer;
|
||||
|
||||
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
|
||||
formatter.write_str("an array representing an event")
|
||||
}
|
||||
|
||||
fn visit_seq<A>(mut self, mut seq: A) -> Result<Self::Value, A::Error>
|
||||
where
|
||||
A: SeqAccess<'de>,
|
||||
{
|
||||
let mut elements: Vec<Value> = Vec::new();
|
||||
|
||||
while let Some(elem) = seq.next_element::<Value>()? {
|
||||
elements.push(elem);
|
||||
}
|
||||
|
||||
if elements.is_empty() {
|
||||
self.deserializer.add_diagnostic(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::WrongFieldCount,
|
||||
"I found an empty array, but events must have at least a string type field as first element.".to_string(),
|
||||
);
|
||||
return Ok(self.deserializer);
|
||||
}
|
||||
|
||||
let event_type = match elements[0].as_str() {
|
||||
Some(t) => t,
|
||||
None => {
|
||||
self.deserializer.add_diagnostic(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::WrongFieldType,
|
||||
"I expected the first element of an event to be a string event type.".to_string(),
|
||||
);
|
||||
return Ok(self.deserializer);
|
||||
}
|
||||
};
|
||||
|
||||
match event_type {
|
||||
"observe" => {
|
||||
if elements.len() != 4 {
|
||||
self.deserializer.add_diagnostic(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::WrongFieldCount,
|
||||
format!("I expected an observe event to have 4 fields, but found {}.", elements.len()),
|
||||
);
|
||||
return Ok(self.deserializer);
|
||||
}
|
||||
|
||||
let id = match elements[1].as_str() {
|
||||
Some(s) => s.to_string(),
|
||||
None => {
|
||||
self.deserializer.add_diagnostic(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::WrongFieldType,
|
||||
"I expected the observation ID to be a string.".to_string(),
|
||||
);
|
||||
return Ok(self.deserializer);
|
||||
}
|
||||
};
|
||||
|
||||
let timestamp = match elements[2].as_str() {
|
||||
Some(s) => match s.parse::<DateTime<Utc>>() {
|
||||
Ok(dt) => dt,
|
||||
Err(_) => {
|
||||
self.deserializer.add_diagnostic(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::WrongFieldType,
|
||||
"I expected the timestamp to be a valid ISO-8601 datetime string.".to_string(),
|
||||
);
|
||||
return Ok(self.deserializer);
|
||||
}
|
||||
},
|
||||
None => {
|
||||
self.deserializer.add_diagnostic(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::WrongFieldType,
|
||||
"I expected the timestamp to be a string.".to_string(),
|
||||
);
|
||||
return Ok(self.deserializer);
|
||||
}
|
||||
};
|
||||
|
||||
let change_count = match elements[3].as_u64() {
|
||||
Some(n) => n as usize,
|
||||
None => {
|
||||
self.deserializer.add_diagnostic(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::WrongFieldType,
|
||||
"I expected the change count to be a non-negative integer.".to_string(),
|
||||
);
|
||||
return Ok(self.deserializer);
|
||||
}
|
||||
};
|
||||
|
||||
self.deserializer.event = Some(Event::Observe {
|
||||
observation_id: id,
|
||||
timestamp,
|
||||
change_count,
|
||||
});
|
||||
}
|
||||
|
||||
"add" => {
|
||||
if elements.len() != 4 {
|
||||
self.deserializer.add_diagnostic(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::WrongFieldCount,
|
||||
format!("I expected an add event to have 4 fields, but found {}.", elements.len()),
|
||||
);
|
||||
return Ok(self.deserializer);
|
||||
}
|
||||
|
||||
let path = match elements[1].as_str() {
|
||||
Some(s) => s.to_string(),
|
||||
None => {
|
||||
self.deserializer.add_diagnostic(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::WrongFieldType,
|
||||
"I expected the path to be a string.".to_string(),
|
||||
);
|
||||
return Ok(self.deserializer);
|
||||
}
|
||||
};
|
||||
|
||||
let value = elements[2].clone();
|
||||
|
||||
let observation_id = match elements[3].as_str() {
|
||||
Some(s) => s.to_string(),
|
||||
None => {
|
||||
self.deserializer.add_diagnostic(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::WrongFieldType,
|
||||
"I expected the observation ID to be a string.".to_string(),
|
||||
);
|
||||
return Ok(self.deserializer);
|
||||
}
|
||||
};
|
||||
|
||||
self.deserializer.event = Some(Event::Add {
|
||||
path,
|
||||
value,
|
||||
observation_id,
|
||||
});
|
||||
}
|
||||
|
||||
"change" => {
|
||||
if elements.len() != 4 {
|
||||
self.deserializer.add_diagnostic(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::WrongFieldCount,
|
||||
format!("I expected a change event to have 4 fields, but found {}.", elements.len()),
|
||||
);
|
||||
return Ok(self.deserializer);
|
||||
}
|
||||
|
||||
let path = match elements[1].as_str() {
|
||||
Some(s) => s.to_string(),
|
||||
None => {
|
||||
self.deserializer.add_diagnostic(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::WrongFieldType,
|
||||
"I expected the path to be a string.".to_string(),
|
||||
);
|
||||
return Ok(self.deserializer);
|
||||
}
|
||||
};
|
||||
|
||||
let new_value = elements[2].clone();
|
||||
|
||||
let observation_id = match elements[3].as_str() {
|
||||
Some(s) => s.to_string(),
|
||||
None => {
|
||||
self.deserializer.add_diagnostic(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::WrongFieldType,
|
||||
"I expected the observation ID to be a string.".to_string(),
|
||||
);
|
||||
return Ok(self.deserializer);
|
||||
}
|
||||
};
|
||||
|
||||
self.deserializer.event = Some(Event::Change {
|
||||
path,
|
||||
new_value,
|
||||
observation_id,
|
||||
});
|
||||
}
|
||||
|
||||
"remove" => {
|
||||
if elements.len() != 3 {
|
||||
self.deserializer.add_diagnostic(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::WrongFieldCount,
|
||||
format!("I expected a remove event to have 3 fields, but found {}.", elements.len()),
|
||||
);
|
||||
return Ok(self.deserializer);
|
||||
}
|
||||
|
||||
let path = match elements[1].as_str() {
|
||||
Some(s) => s.to_string(),
|
||||
None => {
|
||||
self.deserializer.add_diagnostic(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::WrongFieldType,
|
||||
"I expected the path to be a string.".to_string(),
|
||||
);
|
||||
return Ok(self.deserializer);
|
||||
}
|
||||
};
|
||||
|
||||
let observation_id = match elements[2].as_str() {
|
||||
Some(s) => s.to_string(),
|
||||
None => {
|
||||
self.deserializer.add_diagnostic(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::WrongFieldType,
|
||||
"I expected the observation ID to be a string.".to_string(),
|
||||
);
|
||||
return Ok(self.deserializer);
|
||||
}
|
||||
};
|
||||
|
||||
self.deserializer.event = Some(Event::Remove {
|
||||
path,
|
||||
observation_id,
|
||||
});
|
||||
}
|
||||
|
||||
"move" => {
|
||||
if elements.len() != 4 {
|
||||
self.deserializer.add_diagnostic(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::WrongFieldCount,
|
||||
format!("I expected a move event to have 4 fields, but found {}.", elements.len()),
|
||||
);
|
||||
return Ok(self.deserializer);
|
||||
}
|
||||
|
||||
let path = match elements[1].as_str() {
|
||||
Some(s) => s.to_string(),
|
||||
None => {
|
||||
self.deserializer.add_diagnostic(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::WrongFieldType,
|
||||
"I expected the path to be a string.".to_string(),
|
||||
);
|
||||
return Ok(self.deserializer);
|
||||
}
|
||||
};
|
||||
|
||||
let moves = match self.parse_moves(&elements[2]) {
|
||||
Ok(moves) => moves,
|
||||
Err(err_msg) => {
|
||||
self.deserializer.add_diagnostic(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::WrongFieldType,
|
||||
err_msg,
|
||||
);
|
||||
return Ok(self.deserializer);
|
||||
}
|
||||
};
|
||||
|
||||
let observation_id = match elements[3].as_str() {
|
||||
Some(s) => s.to_string(),
|
||||
None => {
|
||||
self.deserializer.add_diagnostic(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::WrongFieldType,
|
||||
"I expected the observation ID to be a string.".to_string(),
|
||||
);
|
||||
return Ok(self.deserializer);
|
||||
}
|
||||
};
|
||||
|
||||
self.deserializer.event = Some(Event::Move {
|
||||
path,
|
||||
moves,
|
||||
observation_id,
|
||||
});
|
||||
}
|
||||
|
||||
"snapshot" => {
|
||||
if elements.len() != 4 {
|
||||
self.deserializer.add_diagnostic(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::WrongFieldCount,
|
||||
format!("I expected a snapshot event to have 4 fields, but found {}.", elements.len()),
|
||||
);
|
||||
return Ok(self.deserializer);
|
||||
}
|
||||
|
||||
let observation_id = match elements[1].as_str() {
|
||||
Some(s) => s.to_string(),
|
||||
None => {
|
||||
self.deserializer.add_diagnostic(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::WrongFieldType,
|
||||
"I expected the observation ID to be a string.".to_string(),
|
||||
);
|
||||
return Ok(self.deserializer);
|
||||
}
|
||||
};
|
||||
|
||||
let timestamp = match elements[2].as_str() {
|
||||
Some(s) => match s.parse::<DateTime<Utc>>() {
|
||||
Ok(dt) => dt,
|
||||
Err(_) => {
|
||||
self.deserializer.add_diagnostic(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::WrongFieldType,
|
||||
"I expected the timestamp to be a valid ISO-8601 datetime string.".to_string(),
|
||||
);
|
||||
return Ok(self.deserializer);
|
||||
}
|
||||
},
|
||||
None => {
|
||||
self.deserializer.add_diagnostic(
|
||||
DiagnosticLevel::Fatal,
|
||||
DiagnosticCode::WrongFieldType,
|
||||
"I expected the timestamp to be a string.".to_string(),
|
||||
);
|
||||
return Ok(self.deserializer);
|
||||
}
|
||||
};
|
||||
|
||||
let object = elements[3].clone();
|
||||
|
||||
self.deserializer.event = Some(Event::Snapshot {
|
||||
observation_id,
|
||||
timestamp,
|
||||
object,
|
||||
});
|
||||
}
|
||||
|
||||
_ => {
|
||||
self.deserializer.add_diagnostic(
|
||||
DiagnosticLevel::Warning,
|
||||
DiagnosticCode::UnknownEventType,
|
||||
format!("I found an unknown event type: '{}'", event_type),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(self.deserializer)
|
||||
}
|
||||
}
|
||||
|
||||
impl EventVisitor {
|
||||
fn parse_moves(&mut self, moves_value: &Value) -> Result<Vec<(usize, usize)>, String> {
|
||||
let moves_array = match moves_value.as_array() {
|
||||
Some(arr) => arr,
|
||||
None => {
|
||||
return Err("I expected the moves to be an array of [from, to] pairs.".to_string());
|
||||
}
|
||||
};
|
||||
|
||||
let mut moves = Vec::new();
|
||||
for move_pair in moves_array {
|
||||
let pair = match move_pair.as_array() {
|
||||
Some(p) if p.len() == 2 => p,
|
||||
_ => {
|
||||
return Err("I expected each move to be a [from, to] pair.".to_string());
|
||||
}
|
||||
};
|
||||
|
||||
let from_idx = match pair[0].as_u64() {
|
||||
Some(i) => i as usize,
|
||||
None => {
|
||||
return Err("I expected the 'from' index to be a non-negative integer.".to_string());
|
||||
}
|
||||
};
|
||||
|
||||
let to_idx = match pair[1].as_u64() {
|
||||
Some(i) => i as usize,
|
||||
None => {
|
||||
return Err("I expected the 'to' index to be a non-negative integer.".to_string());
|
||||
}
|
||||
};
|
||||
|
||||
moves.push((from_idx, to_idx));
|
||||
}
|
||||
|
||||
Ok(moves)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use serde_json::json;
|
||||
|
||||
#[test]
|
||||
fn test_deserialize_observe_event() {
|
||||
let json = json!(["observe", "obs-1", "2025-01-01T00:00:00Z", 1]);
|
||||
let result: Result<EventDeserializer, _> = serde_json::from_value(json);
|
||||
|
||||
assert!(result.is_ok());
|
||||
let deserializer = result.unwrap();
|
||||
assert!(deserializer.diagnostics.is_empty());
|
||||
assert!(matches!(
|
||||
deserializer.event,
|
||||
Some(Event::Observe { observation_id, timestamp: _, change_count })
|
||||
if observation_id == "obs-1" && change_count == 1
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_deserialize_add_event() {
|
||||
let json = json!(["add", "/count", 42, "obs-1"]);
|
||||
let result: Result<EventDeserializer, _> = serde_json::from_value(json);
|
||||
|
||||
assert!(result.is_ok());
|
||||
let deserializer = result.unwrap();
|
||||
assert!(deserializer.diagnostics.is_empty());
|
||||
assert!(matches!(
|
||||
deserializer.event,
|
||||
Some(Event::Add { path, value, observation_id })
|
||||
if path == "/count" && value == json!(42) && observation_id == "obs-1"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_deserialize_invalid_event_type() {
|
||||
let json = json!(["invalid", "some", "data"]);
|
||||
let result: Result<EventDeserializer, _> = serde_json::from_value(json);
|
||||
|
||||
assert!(result.is_ok());
|
||||
let deserializer = result.unwrap();
|
||||
assert_eq!(deserializer.diagnostics.len(), 1);
|
||||
assert_eq!(deserializer.diagnostics[0].code, DiagnosticCode::UnknownEventType);
|
||||
assert!(deserializer.event.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_deserialize_wrong_field_count() {
|
||||
let json = json!(["observe", "obs-1"]);
|
||||
let result: Result<EventDeserializer, _> = serde_json::from_value(json);
|
||||
|
||||
assert!(result.is_ok());
|
||||
let deserializer = result.unwrap();
|
||||
assert_eq!(deserializer.diagnostics.len(), 1);
|
||||
assert_eq!(deserializer.diagnostics[0].code, DiagnosticCode::WrongFieldCount);
|
||||
assert!(deserializer.event.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_deserialize_move_event() {
|
||||
let json = json!(["move", "/items", [[0, 2], [1, 0]], "obs-1"]);
|
||||
let result: Result<EventDeserializer, _> = serde_json::from_value(json);
|
||||
|
||||
assert!(result.is_ok());
|
||||
let deserializer = result.unwrap();
|
||||
assert!(deserializer.diagnostics.is_empty());
|
||||
assert!(matches!(
|
||||
deserializer.event,
|
||||
Some(Event::Move { path, moves, observation_id })
|
||||
if path == "/items" && moves == vec![(0, 2), (1, 0)] && observation_id == "obs-1"
|
||||
));
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue