// json-archive is a tool for tracking JSON file changes over time // Copyright (C) 2025 Peoples Grocers LLC // // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU Affero General Public License as published // by the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU Affero General Public License for more details. // // You should have received a copy of the GNU Affero General Public License // along with this program. If not, see . // // To purchase a license under different terms contact admin@peoplesgrocers.com // To request changes, report bugs, or give user feedback contact // marxism@peoplesgrocers.com // //! Event deserialization with diagnostic collection. //! //! ## Why this exists //! //! The .json.archive format uses arrays for events because that's compact and easy to work //! with in JavaScript: `["add", "/path", value, "obs-id"]`. The format is human-editable //! since people might want to experiment with it or fix issues by hand. //! //! Two problems in Rust: //! //! 1. **Array-based format**: Serde derive expects named struct fields. Deserializing from //! positional arrays into structs requires custom Visitor implementation. //! //! 2. **Detailed error messages**: Goal is Elm-style diagnostics that show exactly what went //! wrong, what was expected, and how to fix it. Serde's Deserialize trait only allows //! returning string errors. To generate detailed diagnostics (with codes, severity levels, //! advice), we need to manually implement the Visitor and collect errors in a wrapper type //! instead of failing immediately. The wrapper gives us access to which field is being //! parsed so we can say "expected observation ID at position 3" instead of "parse error". //! //! ## Library search //! //! Spent 30 minutes looking for existing solutions. Checked: //! - serde_path_to_error: Adds field path context but still returns string errors //! - figment: Configuration library, but sounded like could be used only for diagnostics //! - config/serde_value: Similar issue //! - json5: Relaxed JSON syntax, not diagnostic-focused //! - miette: a diagnostic library for Rust. It includes a series of //! traits/protocols that allow you to hook into its error reporting facilities, //! and even write your own error reports. This is better than my home built //! Diagnostic struct, but does not help me with deserialization. //! //! Found no library that handles both array deserialization and rich diagnostic collection. //! This could probably be automated or turned into a library, but for a simple format it was //! faster to implement by hand. Also serves as exploration of what diagnostic-driven parsing //! costs in terms of code. //! //! ## What this does //! //! EventDeserializer wraps Event and collects diagnostics during parsing. It implements //! Deserialize with a custom Visitor that validates each array position and populates the //! diagnostics vec instead of returning errors. The calling code (reader.rs) attaches //! location information (filename, line number) after deserialization. use serde::de::{Deserialize, Deserializer, SeqAccess, Visitor}; use serde_json::Value; use std::fmt; use chrono::{DateTime, Utc}; use crate::diagnostics::{Diagnostic, DiagnosticCode, DiagnosticLevel}; use crate::events::Event; #[derive(Debug, Default)] pub struct EventDeserializer { pub event: Option, pub diagnostics: Vec, } impl EventDeserializer { pub fn new() -> Self { Self::default() } fn add_diagnostic(&mut self, level: DiagnosticLevel, code: DiagnosticCode, message: String) { self.diagnostics.push(Diagnostic::new(level, code, message)); } } impl<'de> Deserialize<'de> for EventDeserializer { fn deserialize(deserializer: D) -> Result where D: Deserializer<'de>, { deserializer.deserialize_seq(EventVisitor::new()) } } struct EventVisitor { deserializer: EventDeserializer, } impl EventVisitor { fn new() -> Self { Self { deserializer: EventDeserializer::new(), } } } impl<'de> Visitor<'de> for EventVisitor { type Value = EventDeserializer; fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { formatter.write_str("an array representing an event") } fn visit_seq(mut self, mut seq: A) -> Result where A: SeqAccess<'de>, { let mut elements: Vec = Vec::new(); while let Some(elem) = seq.next_element::()? { elements.push(elem); } if elements.is_empty() { self.deserializer.add_diagnostic( DiagnosticLevel::Fatal, DiagnosticCode::WrongFieldCount, "I found an empty array, but events must have at least a string type field as first element.".to_string(), ); return Ok(self.deserializer); } let event_type = match elements[0].as_str() { Some(t) => t, None => { self.deserializer.add_diagnostic( DiagnosticLevel::Fatal, DiagnosticCode::WrongFieldType, "I expected the first element of an event to be a string event type.".to_string(), ); return Ok(self.deserializer); } }; match event_type { "observe" => { if elements.len() != 4 { self.deserializer.add_diagnostic( DiagnosticLevel::Fatal, DiagnosticCode::WrongFieldCount, format!("I expected an observe event to have 4 fields, but found {}.", elements.len()), ); return Ok(self.deserializer); } let id = match elements[1].as_str() { Some(s) => s.to_string(), None => { self.deserializer.add_diagnostic( DiagnosticLevel::Fatal, DiagnosticCode::WrongFieldType, "I expected the observation ID to be a string.".to_string(), ); return Ok(self.deserializer); } }; let timestamp = match elements[2].as_str() { Some(s) => match s.parse::>() { Ok(dt) => dt, Err(_) => { self.deserializer.add_diagnostic( DiagnosticLevel::Fatal, DiagnosticCode::WrongFieldType, "I expected the timestamp to be a valid ISO-8601 datetime string.".to_string(), ); return Ok(self.deserializer); } }, None => { self.deserializer.add_diagnostic( DiagnosticLevel::Fatal, DiagnosticCode::WrongFieldType, "I expected the timestamp to be a string.".to_string(), ); return Ok(self.deserializer); } }; let change_count = match elements[3].as_u64() { Some(n) => n as usize, None => { self.deserializer.add_diagnostic( DiagnosticLevel::Fatal, DiagnosticCode::WrongFieldType, "I expected the change count to be a non-negative integer.".to_string(), ); return Ok(self.deserializer); } }; self.deserializer.event = Some(Event::Observe { observation_id: id, timestamp, change_count, }); } "add" => { if elements.len() != 4 { self.deserializer.add_diagnostic( DiagnosticLevel::Fatal, DiagnosticCode::WrongFieldCount, format!("I expected an add event to have 4 fields, but found {}.", elements.len()), ); return Ok(self.deserializer); } let path = match elements[1].as_str() { Some(s) => s.to_string(), None => { self.deserializer.add_diagnostic( DiagnosticLevel::Fatal, DiagnosticCode::WrongFieldType, "I expected the path to be a string.".to_string(), ); return Ok(self.deserializer); } }; let value = elements[2].clone(); let observation_id = match elements[3].as_str() { Some(s) => s.to_string(), None => { self.deserializer.add_diagnostic( DiagnosticLevel::Fatal, DiagnosticCode::WrongFieldType, "I expected the observation ID to be a string.".to_string(), ); return Ok(self.deserializer); } }; self.deserializer.event = Some(Event::Add { path, value, observation_id, }); } "change" => { if elements.len() != 4 { self.deserializer.add_diagnostic( DiagnosticLevel::Fatal, DiagnosticCode::WrongFieldCount, format!("I expected a change event to have 4 fields, but found {}.", elements.len()), ); return Ok(self.deserializer); } let path = match elements[1].as_str() { Some(s) => s.to_string(), None => { self.deserializer.add_diagnostic( DiagnosticLevel::Fatal, DiagnosticCode::WrongFieldType, "I expected the path to be a string.".to_string(), ); return Ok(self.deserializer); } }; let new_value = elements[2].clone(); let observation_id = match elements[3].as_str() { Some(s) => s.to_string(), None => { self.deserializer.add_diagnostic( DiagnosticLevel::Fatal, DiagnosticCode::WrongFieldType, "I expected the observation ID to be a string.".to_string(), ); return Ok(self.deserializer); } }; self.deserializer.event = Some(Event::Change { path, new_value, observation_id, }); } "remove" => { if elements.len() != 3 { self.deserializer.add_diagnostic( DiagnosticLevel::Fatal, DiagnosticCode::WrongFieldCount, format!("I expected a remove event to have 3 fields, but found {}.", elements.len()), ); return Ok(self.deserializer); } let path = match elements[1].as_str() { Some(s) => s.to_string(), None => { self.deserializer.add_diagnostic( DiagnosticLevel::Fatal, DiagnosticCode::WrongFieldType, "I expected the path to be a string.".to_string(), ); return Ok(self.deserializer); } }; let observation_id = match elements[2].as_str() { Some(s) => s.to_string(), None => { self.deserializer.add_diagnostic( DiagnosticLevel::Fatal, DiagnosticCode::WrongFieldType, "I expected the observation ID to be a string.".to_string(), ); return Ok(self.deserializer); } }; self.deserializer.event = Some(Event::Remove { path, observation_id, }); } "move" => { if elements.len() != 4 { self.deserializer.add_diagnostic( DiagnosticLevel::Fatal, DiagnosticCode::WrongFieldCount, format!("I expected a move event to have 4 fields, but found {}.", elements.len()), ); return Ok(self.deserializer); } let path = match elements[1].as_str() { Some(s) => s.to_string(), None => { self.deserializer.add_diagnostic( DiagnosticLevel::Fatal, DiagnosticCode::WrongFieldType, "I expected the path to be a string.".to_string(), ); return Ok(self.deserializer); } }; let moves = match self.parse_moves(&elements[2]) { Ok(moves) => moves, Err(err_msg) => { self.deserializer.add_diagnostic( DiagnosticLevel::Fatal, DiagnosticCode::WrongFieldType, err_msg, ); return Ok(self.deserializer); } }; let observation_id = match elements[3].as_str() { Some(s) => s.to_string(), None => { self.deserializer.add_diagnostic( DiagnosticLevel::Fatal, DiagnosticCode::WrongFieldType, "I expected the observation ID to be a string.".to_string(), ); return Ok(self.deserializer); } }; self.deserializer.event = Some(Event::Move { path, moves, observation_id, }); } "snapshot" => { if elements.len() != 4 { self.deserializer.add_diagnostic( DiagnosticLevel::Fatal, DiagnosticCode::WrongFieldCount, format!("I expected a snapshot event to have 4 fields, but found {}.", elements.len()), ); return Ok(self.deserializer); } let observation_id = match elements[1].as_str() { Some(s) => s.to_string(), None => { self.deserializer.add_diagnostic( DiagnosticLevel::Fatal, DiagnosticCode::WrongFieldType, "I expected the observation ID to be a string.".to_string(), ); return Ok(self.deserializer); } }; let timestamp = match elements[2].as_str() { Some(s) => match s.parse::>() { Ok(dt) => dt, Err(_) => { self.deserializer.add_diagnostic( DiagnosticLevel::Fatal, DiagnosticCode::WrongFieldType, "I expected the timestamp to be a valid ISO-8601 datetime string.".to_string(), ); return Ok(self.deserializer); } }, None => { self.deserializer.add_diagnostic( DiagnosticLevel::Fatal, DiagnosticCode::WrongFieldType, "I expected the timestamp to be a string.".to_string(), ); return Ok(self.deserializer); } }; let object = elements[3].clone(); self.deserializer.event = Some(Event::Snapshot { observation_id, timestamp, object, }); } _ => { self.deserializer.add_diagnostic( DiagnosticLevel::Warning, DiagnosticCode::UnknownEventType, format!("I found an unknown event type: '{}'", event_type), ); } } Ok(self.deserializer) } } impl EventVisitor { fn parse_moves(&mut self, moves_value: &Value) -> Result, String> { let moves_array = match moves_value.as_array() { Some(arr) => arr, None => { return Err("I expected the moves to be an array of [from, to] pairs.".to_string()); } }; let mut moves = Vec::new(); for move_pair in moves_array { let pair = match move_pair.as_array() { Some(p) if p.len() == 2 => p, _ => { return Err("I expected each move to be a [from, to] pair.".to_string()); } }; let from_idx = match pair[0].as_u64() { Some(i) => i as usize, None => { return Err("I expected the 'from' index to be a non-negative integer.".to_string()); } }; let to_idx = match pair[1].as_u64() { Some(i) => i as usize, None => { return Err("I expected the 'to' index to be a non-negative integer.".to_string()); } }; moves.push((from_idx, to_idx)); } Ok(moves) } } #[cfg(test)] mod tests { use super::*; use serde_json::json; #[test] fn test_deserialize_observe_event() { let json = json!(["observe", "obs-1", "2025-01-01T00:00:00Z", 1]); let result: Result = serde_json::from_value(json); assert!(result.is_ok()); let deserializer = result.unwrap(); assert!(deserializer.diagnostics.is_empty()); assert!(matches!( deserializer.event, Some(Event::Observe { observation_id, timestamp: _, change_count }) if observation_id == "obs-1" && change_count == 1 )); } #[test] fn test_deserialize_add_event() { let json = json!(["add", "/count", 42, "obs-1"]); let result: Result = serde_json::from_value(json); assert!(result.is_ok()); let deserializer = result.unwrap(); assert!(deserializer.diagnostics.is_empty()); assert!(matches!( deserializer.event, Some(Event::Add { path, value, observation_id }) if path == "/count" && value == json!(42) && observation_id == "obs-1" )); } #[test] fn test_deserialize_invalid_event_type() { let json = json!(["invalid", "some", "data"]); let result: Result = serde_json::from_value(json); assert!(result.is_ok()); let deserializer = result.unwrap(); assert_eq!(deserializer.diagnostics.len(), 1); assert_eq!(deserializer.diagnostics[0].code, DiagnosticCode::UnknownEventType); assert!(deserializer.event.is_none()); } #[test] fn test_deserialize_wrong_field_count() { let json = json!(["observe", "obs-1"]); let result: Result = serde_json::from_value(json); assert!(result.is_ok()); let deserializer = result.unwrap(); assert_eq!(deserializer.diagnostics.len(), 1); assert_eq!(deserializer.diagnostics[0].code, DiagnosticCode::WrongFieldCount); assert!(deserializer.event.is_none()); } #[test] fn test_deserialize_move_event() { let json = json!(["move", "/items", [[0, 2], [1, 0]], "obs-1"]); let result: Result = serde_json::from_value(json); assert!(result.is_ok()); let deserializer = result.unwrap(); assert!(deserializer.diagnostics.is_empty()); assert!(matches!( deserializer.event, Some(Event::Move { path, moves, observation_id }) if path == "/items" && moves == vec![(0, 2), (1, 0)] && observation_id == "obs-1" )); } }