feat: experiment with different implementations of LSEQ

2025-07-08 16:49:52 -07:00 · 2025-07-08 16:49:52 -07:00 · 1e45ef9314
commit 1e45ef9314
23 changed files with 3578 additions and 0 deletions
--- a/rust/Cargo.toml
+++ b/rust/Cargo.toml
@ -0,0 +1,21 @@
+[package]
+name = "peoplesgrocers-lseq"
+version = "1.0.0"
+edition = "2021"
+description = "L-SEQ algorithm implementation for fractional indexing and list CRDTs"
+keywords = ["lseq", "crdt", "fractional-indexing", "sequence", "collaborative-editing"]
+categories = ["data-structures", "algorithms"]
+license = "MIT"
+repository = "https://github.com/peoplesgrocers/lseq"
+readme = "README.md"
+
+[features]
+default = []
+serde = ["dep:serde"]
+
+[dependencies]
+rand = "0.8"
+serde = { version = "1.0", features = ["derive"], optional = true }
+
+[dev-dependencies]
+rand = { version = "0.8", features = ["small_rng"] } 
--- a/rust/README.md
+++ b/rust/README.md
@ -0,0 +1,82 @@
+# peoplesgrocers-lseq
+
+Rust implementation of the L-SEQ algorithm for fractional indexing and list CRDTs.
+
+## Installation
+
+Add this to your `Cargo.toml`:
+
+```toml
+[dependencies]
+peoplesgrocers-lseq = "1.0.0"
+```
+
+## Usage
+
+```rust
+use peoplesgrocers_lseq::{LSEQ, SortKey, compare_lseq};
+use rand::thread_rng;
+
+// Create a new L-SEQ instance
+let mut lseq = LSEQ::new(thread_rng());
+
+// Allocate identifiers
+let id1 = lseq.alloc(None, None);           // First identifier
+let id2 = lseq.alloc(Some(&id1), None);     // After id1
+let id3 = lseq.alloc(Some(&id1), Some(&id2)); // Between id1 and id2
+
+// Sort identifiers
+let mut ids = vec![id3.clone(), id1.clone(), id2.clone()];
+ids.sort();
+println!("{:?}", ids); // [id1, id3, id2] - properly ordered
+
+// Convert to/from strings
+let key_str = id1.to_string();
+let parsed_key: SortKey = key_str.parse().unwrap();
+assert_eq!(id1, parsed_key);
+
+// Use with deterministic RNG for testing
+use rand::rngs::StdRng;
+use rand::SeedableRng;
+
+let rng = StdRng::seed_from_u64(42);
+let mut deterministic_lseq = LSEQ::new(rng);
+```
+
+## Features
+
+- **Fractional indexing**: Generate identifiers that can be inserted between any two existing ones
+- **Serialization**: Full support for serde serialization/deserialization
+- **Ordering**: SortKey implements Ord and can be used directly with Rust's sorting
+- **String conversion**: Convert to/from strings for storage and transmission
+- **Even spacing**: Utilities for generating evenly distributed keys for bulk operations
+
+## API
+
+### `LSEQ<R: Rng>`
+
+#### `new(rng: R) -> Self`
+
+Creates a new L-SEQ instance with the given random number generator.
+
+#### `alloc(&mut self, before: Option<&SortKey>, after: Option<&SortKey>) -> SortKey`
+
+Allocates a new identifier between two existing identifiers.
+
+- `before`: The identifier that should come before the new one (or `None` for beginning)
+- `after`: The identifier that should come after the new one (or `None` for end)
+- Returns: A new SortKey that sorts between `before` and `after`
+
+### `SortKey`
+
+A sort key that implements `Ord`, `Serialize`, `Deserialize`, and string conversion.
+
+### `EvenSpacingIterator`
+
+Utility for generating evenly spaced sort keys for bulk operations.
+
+## How it works
+
+L-SEQ generates identifiers using a base-64 alphabet that maintains lexicographic ordering. Each identifier is a sequence of characters from this alphabet, and new identifiers are generated by finding space between existing ones at different depths.
+
+The algorithm uses alternating allocation strategies (bias toward min or max) at different depths to avoid degenerative cases and maintain good performance characteristics. 
--- a/rust/src/lib.rs
+++ b/rust/src/lib.rs
@ -0,0 +1,382 @@
+use rand::Rng;
+#[cfg(feature = "serde")]
+use serde::{
+    de::{self, Visitor},
+    Deserialize, Serialize,
+};
+use std::error::Error;
+use std::fmt;
+use std::str::FromStr;
+
+const ALPHABET: &[u8] = b"-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz";
+
+#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
+#[cfg_attr(feature = "serde", derive(Serialize))]
+#[cfg_attr(feature = "serde", serde(into = "String"))]
+pub struct SortKey {
+    numbers: Vec<u8>,
+}
+
+#[cfg(feature = "serde")]
+impl<'de> Deserialize<'de> for SortKey {
+    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+    where
+        D: serde::Deserializer<'de>,
+    {
+        struct SortKeyVisitor;
+
+        impl<'de> Visitor<'de> for SortKeyVisitor {
+            type Value = SortKey;
+
+            fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
+                formatter.write_str("a string containing valid sort key characters")
+            }
+
+            fn visit_str<E>(self, value: &str) -> Result<SortKey, E>
+            where
+                E: de::Error,
+            {
+                value.parse().map_err(|e| E::custom(e))
+            }
+        }
+
+        deserializer.deserialize_str(SortKeyVisitor)
+    }
+}
+
+impl SortKey {
+    pub fn from_numbers(numbers: Vec<u8>) -> Self {
+        SortKey { numbers }
+    }
+}
+
+impl From<SortKey> for Vec<u8> {
+    fn from(key: SortKey) -> Vec<u8> {
+        key.numbers
+    }
+}
+
+impl From<SortKey> for String {
+    fn from(key: SortKey) -> String {
+        key.to_string()
+    }
+}
+
+impl AsRef<[u8]> for SortKey {
+    fn as_ref(&self) -> &[u8] {
+        &self.numbers
+    }
+}
+
+impl From<String> for SortKey {
+    fn from(s: String) -> Self {
+        s.parse().unwrap_or_else(|_| SortKey { numbers: vec![0] })
+    }
+}
+
+impl fmt::Display for SortKey {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        for &n in &self.numbers {
+            write!(f, "{}", ALPHABET[n as usize] as char)?;
+        }
+        Ok(())
+    }
+}
+
+#[allow(dead_code)]
+#[derive(Debug)]
+pub struct LSEQ<R: Rng> {
+    strategies: Vec<bool>,
+    rng: R,
+}
+
+#[allow(dead_code)]
+impl<R: Rng> LSEQ<R> {
+    pub fn new(mut rng: R) -> Self {
+        let strategies = vec![rng.gen_bool(0.5)];
+        LSEQ { strategies, rng }
+    }
+
+    pub fn alloc(&mut self, before: Option<&SortKey>, after: Option<&SortKey>) -> SortKey {
+        // Convert to numeric arrays, using boundary values for null
+        let p = before.map_or(vec![0], |s| s.numbers.clone());
+        let q = after.map_or(vec![63], |s| s.numbers.clone());
+
+        // Walk through digits looking for space
+        let mut depth = 0;
+        let mut result = Vec::new();
+
+        loop {
+            let p_val = if depth < p.len() { p[depth] } else { 0 };
+            let q_val = if depth < q.len() { q[depth] } else { 63 };
+
+            let interval = q_val as i32 - p_val as i32;
+
+            // If we have space between values at this depth
+            if interval > 1 {
+                // Pick a value in the available range
+                let range = interval - 1;
+                let add_val = 1 + self.rng.gen_range(0..range) as u8;
+                let new_value = if self.strategies[depth] {
+                    p_val + add_val
+                } else {
+                    q_val - add_val
+                };
+
+                // Take the prefix from p up to depth and append our new value
+                result.push(new_value);
+                return SortKey::from_numbers(result);
+            }
+            result.push(p_val);
+
+            // If values are the same or adjacent at this depth,
+            // continue to next depth
+            depth += 1;
+            if depth >= self.strategies.len() {
+                self.strategies.push(self.rng.gen_bool(0.5));
+            }
+        }
+    }
+}
+
+#[derive(Debug)]
+pub enum SpacingError {
+    TooManyItems,
+}
+
+impl fmt::Display for SpacingError {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match self {
+            SpacingError::TooManyItems => write!(f, "Too many items to allocate"),
+        }
+    }
+}
+
+impl Error for SpacingError {}
+
+#[derive(Debug, Clone)]
+pub struct EvenSpacingIterator {
+    remaining_items: usize,
+    space_size: u64,
+    next_item: u64,
+    step_size_integer: u64, // Integer part of step size
+    step_size_error: f64,   // Fractional part of step size
+    error_accumulator: f64, // Accumulated error
+}
+
+impl EvenSpacingIterator {
+    // Static table of (64^k - 2) values for k from 1 to 9
+    // We subtract 2 from each space size because we need to reserve two boundary positions:
+    // 1. Position 0 (represented by "-") is reserved as the lower boundary
+    // 2. Position 63 (represented by "z") is reserved as the upper boundary
+    // This ensures we can always insert elements at the very beginning or end of the sequence
+    const USABLE_SPACE: [usize; 9] = [
+        64 - 2,                // 64^1 - 2
+        4096 - 2,              // 64^2 - 2
+        262144 - 2,            // 64^3 - 2
+        16777216 - 2,          // 64^4 - 2
+        1073741824 - 2,        // 64^5 - 2
+        68719476736 - 2,       // 64^6 - 2
+        4398046511104 - 2,     // 64^7 - 2
+        281474976710656 - 2,   // 64^8 - 2
+        18014398509481984 - 2, // 64^9 - 2
+    ];
+
+    pub fn new(total_items: usize) -> Result<(u64, Self), SpacingError> {
+        if total_items == 0 {
+            return Err(SpacingError::TooManyItems);
+        }
+
+        // Find the smallest k where 64^k > total_items using the static table
+        let mut k = 0;
+        let mut space_size = 0;
+
+        for (index, &size) in Self::USABLE_SPACE.iter().enumerate() {
+            if size >= total_items {
+                k = index as u64 + 1; // k is 1-indexed
+                space_size = size;
+                break;
+            }
+        }
+
+        // If we couldn't find a suitable k, the request is too large
+        if k == 0 {
+            return Err(SpacingError::TooManyItems);
+        }
+
+        // Calculate step size split into integer and fractional parts
+        let step_size = (space_size as f64) / (total_items as f64);
+        let step_size_integer = step_size.floor() as u64;
+        let step_size_error = step_size - step_size_integer as f64;
+
+        Ok((
+            k,
+            EvenSpacingIterator {
+                remaining_items: total_items,
+                space_size: space_size.try_into().unwrap(),
+                next_item: 1,
+                step_size_integer,
+                step_size_error,
+                error_accumulator: 0.0,
+            },
+        ))
+    }
+
+    // Helper method to convert a position to a sort key
+    pub fn position_to_key(k: u64, position: u64) -> SortKey {
+        let mut result = Vec::with_capacity(k as usize);
+        let mut pos = position;
+        const BASE: u64 = 64;
+
+        // Fill in digits from least significant to most significant
+        for _ in 0..k {
+            // SAFETY: digit is guaranteed to be in bounds because:
+            // 1. digit = pos % base where base is 64
+            // 2. ALPHABET has exactly 64 elements
+            // Therefore digit as u64 will always be 0-63
+            let digit = (pos % BASE) as u8;
+            pos /= BASE;
+            result.push(digit);
+        }
+
+        // Reverse to get most significant digit first
+        result.reverse();
+        SortKey::from_numbers(result)
+    }
+}
+
+impl Iterator for EvenSpacingIterator {
+    type Item = u64;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        if self.remaining_items == 0 {
+            return None;
+        }
+
+        if self.next_item > self.space_size {
+            return None;
+        }
+
+        let current_position = self.next_item;
+        self.remaining_items -= 1;
+
+        self.next_item += self.step_size_integer;
+
+        self.error_accumulator += self.step_size_error;
+        if self.error_accumulator >= 1.0 {
+            self.next_item += 1;
+            self.error_accumulator -= 1.0;
+        }
+
+        Some(current_position)
+    }
+}
+
+#[derive(Debug)]
+pub enum SortKeyParseError {
+    InvalidCharacter(char),
+}
+
+impl fmt::Display for SortKeyParseError {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            SortKeyParseError::InvalidCharacter(c) => write!(
+                f,
+                "Invalid character '{}' in sort key. Expected characters from alphabet: {}",
+                c,
+                String::from_utf8_lossy(ALPHABET)
+            ),
+        }
+    }
+}
+
+impl Error for SortKeyParseError {}
+
+impl FromStr for SortKey {
+    type Err = SortKeyParseError;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        let numbers = s
+            .bytes()
+            .map(|b| ALPHABET.iter().position(|&x| x == b).map(|pos| pos as u8))
+            .collect::<Option<Vec<u8>>>()
+            .ok_or_else(|| SortKeyParseError::InvalidCharacter(s.chars().next().unwrap()))?;
+        Ok(SortKey { numbers })
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use rand::rngs::StdRng;
+    use rand::SeedableRng;
+
+    #[test]
+    fn test_compare_lseq() {
+        let a = "a".parse::<SortKey>().unwrap();
+        let b = "b".parse::<SortKey>().unwrap();
+        assert_eq!(a < b, true);
+        assert_eq!(b < a, false);
+        assert_eq!(a < a, false);
+    }
+
+    #[test]
+    fn test_lseq_alloc() {
+        let rng = StdRng::seed_from_u64(42); // Deterministic RNG for testing
+        let mut lseq = LSEQ::new(rng);
+        let id1 = lseq.alloc(None, None);
+        let id2 = lseq.alloc(Some(&id1), None);
+        let id3 = lseq.alloc(Some(&id1), Some(&id2));
+
+        assert!(id1 < id2);
+        assert!(id1 < id3);
+        assert!(id3 < id2);
+    }
+
+    #[test]
+    fn test_position_to_key() {
+        const K: u64 = 2;
+        assert_eq!(EvenSpacingIterator::position_to_key(K, 1).to_string(), "-0");
+    }
+
+    #[test]
+    fn test_even_spacing_4093() {
+        let (k, mut iter) = EvenSpacingIterator::new(4093).unwrap();
+        assert_eq!(k, 2);
+        let mut positions = Vec::new();
+        for pos in iter.by_ref() {
+            // Use by_ref() to borrow instead of consume
+            positions.push(pos);
+        }
+
+        // Print all generated sort keys
+        //println!("\nGenerated sort keys for 62 positions:");
+        //for (i, pos) in positions.iter().enumerate() {
+        //    let key = EvenSpacingIterator::position_to_key(k, *pos);
+        //    println!("Position {}: {} (numeric: {})", i, key, pos);
+        //}
+        println!("{:?}", iter);
+
+        assert_eq!(positions.len(), 4093);
+    }
+
+    #[test]
+    fn test_even_spacing_6() {
+        let (k, mut iter) = EvenSpacingIterator::new(6).unwrap();
+        eprintln!("Created iterator with k={}", k);
+        let mut positions = Vec::new();
+        let mut count = 0;
+        while let Some(pos) = iter.next() {
+            count += 1;
+            eprintln!("Iteration {}: Got position {}", count, pos);
+            positions.push(pos);
+        }
+        eprintln!("Final iterator state: {:?}", iter);
+        assert_eq!(
+            positions.len(),
+            6,
+            "Expected 6 positions, got {}",
+            positions.len()
+        );
+    }
+}